I read this other SO question and answer and it seems to make sense to me but I had one additional question to add it it.
The most up voted answer says
For small functions that are called frequently that can make a big performance difference.
okay, so what would be considered a small function?
The reason that I am asking is that I am looking at using a math library, vectormath from the bullet physics framework. All their math functions are static inline but while some are fairly short some are pretty long.
Here's what I consider short:
static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat )
{
vmathV3Copy( &result->col0, &mat->col0 );
vmathV3Copy( &result->col1, &mat->col1 );
vmathV3Copy( &result->col2, &mat->col2 );
}
but even that would embed this function 3 time:
static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
{
result->x = vec->x;
result->y = vec->y;
result->z = vec->z;
}
Here's what seems to be long to me:
static inline float vmathM4Determinant( const VmathMatrix4 *mat )
{
float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
mA = mat->col0.x;
mB = mat->col0.y;
mC = mat->col0.z;
mD = mat->col0.w;
mE = mat->col1.x;
mF = mat->col1.y;
mG = mat->col1.z;
mH = mat->col1.w;
mI = mat->col2.x;
mJ = mat->col2.y;
mK = mat->col2.z;
mL = mat->col2.w;
mM = mat->col3.x;
mN = mat->col3.y;
mO = mat->col3.z;
mP = mat->col3.w;
tmp0 = ( ( mK * mD ) - ( mC * mL ) );
tmp1 = ( ( mO * mH ) - ( mG * mP ) );
tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
tmp3 = ( ( mF * mO ) - ( mN * mG ) );
tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
tmp5 = ( ( mN * mH ) - ( mF * mP ) );
dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) );
dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) );
dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) );
dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) );
return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) );
}
or even this one
static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
{
VmathVector4 res0, res1, res2, res3;
float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
mA = mat->col0.x;
mB = mat->col0.y;
mC = mat->col0.z;
mD = mat->col0.w;
mE = mat->col1.x;
mF = mat->col1.y;
mG = mat->col1.z;
mH = mat->col1.w;
mI = mat->col2.x;
mJ = mat->col2.y;
mK = mat->col2.z;
mL = mat->col2.w;
mM = mat->col3.x;
mN = mat->col3.y;
mO = mat->col3.z;
mP = mat->col3.w;
tmp0 = ( ( mK * mD ) - ( mC * mL ) );
tmp1 = ( ( mO * mH ) - ( mG * mP ) );
tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
tmp3 = ( ( mF * mO ) - ( mN * mG ) );
tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
tmp5 = ( ( mN * mH ) - ( mF * mP ) );
vmathV4SetX( &res0, ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) );
vmathV4SetY( &res0, ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) );
vmathV4SetZ( &res0, ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) );
vmathV4SetW( &res0, ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) );
detInv = ( 1.0f / ( ( ( ( mA * res0.x ) + ( mE * res0.y ) ) + ( mI * res0.z ) ) + ( mM * res0.w ) ) );
vmathV4SetX( &res1, ( mI * tmp1 ) );
vmathV4SetY( &res1, ( mM * tmp0 ) );
vmathV4SetZ( &res1, ( mA * tmp1 ) );
vmathV4SetW( &res1, ( mE * tmp0 ) );
vmathV4SetX( &res3, ( mI * tmp3 ) );
vmathV4SetY( &res3, ( mM * tmp2 ) );
vmathV4SetZ( &res3, ( mA * tmp3 ) );
vmathV4SetW( &res3, ( mE * tmp2 ) );
vmathV4SetX( &res2, ( mI * tmp5 ) );
vmathV4SetY( &res2, ( mM * tmp4 ) );
vmathV4SetZ( &res2, ( mA * tmp5 ) );
vmathV4SetW( &res2, ( mE * tmp4 ) );
tmp0 = ( ( mI * mB ) - ( mA * mJ ) );
tmp1 = ( ( mM * mF ) - ( mE * mN ) );
tmp2 = ( ( mI * mD ) - ( mA * mL ) );
tmp3 = ( ( mM * mH ) - ( mE * mP ) );
tmp4 = ( ( mI * mC ) - ( mA * mK ) );
tmp5 = ( ( mM * mG ) - ( mE * mO ) );
vmathV4SetX( &res2, ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.x ) );
vmathV4SetY( &res2, ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.y ) );
vmathV4SetZ( &res2, ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.z ) );
vmathV4SetW( &res2, ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.w ) );
vmathV4SetX( &res3, ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.x ) );
vmathV4SetY( &res3, ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.y ) );
vmathV4SetZ( &res3, ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.z ) );
vmathV4SetW( &res3, ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.w ) );
vmathV4SetX( &res1, ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.x ) );
vmathV4SetY( &res1, ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.y ) );
vmathV4SetZ( &res1, ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.z ) );
vmathV4SetW( &res1, ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.w ) );
vmathV4ScalarMul( &result->col0, &res0, detInv );
vmathV4ScalarMul( &result->col1, &res1, detInv );
vmathV4ScalarMul( &result->col2, &res2, detInv );
vmathV4ScalarMul( &result->col3, &res3, detInv );
}
The guys who wrote the library obviously understand the math very well but if your doing a lot of math operations and the compiler probably inlining all these functions wouldn't you get a bigger file?