You are pessemizing your code with move constructors here. The matrix addition can be done safely without move constructors at all and the compilers are clever enough to optimize it away.
Here is some test code to prove what I'm saying:
#include <stdint.h>
class Matrix3
{
public:
float Mtx[3][3];
inline Matrix3() {};
inline Matrix3 operator+( const Matrix3& Matrix ) const
{
Matrix3 Result;
for ( size_t i = 0; i != 3; ++i )
{
for ( size_t j = 0; j != 3; ++j )
{
Result.Mtx[i][j] = Mtx[i][j] + Matrix.Mtx[i][j];
}
}
return Result;
}
virtual int GetResult() const
{
int Result = 0;
for ( size_t i = 0; i != 3; ++i )
{
for ( size_t j = 0; j != 3; ++j )
{
Result += (int)Mtx[i][j];
}
}
return Result;
}
};
int main()
{
Matrix3 M;
Matrix3 M1;
Matrix3 M2;
Matrix3 M3;
Matrix3 M4;
M = M1 + M2 + M3 + M4;
return M.GetResult();
}
I use GCC: (GNU) 4.9.0 20131110 (experimental)
as follows: g++ -O3 main.cpp -S
The output assembly looks as below:
_main:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
subl $176, %esp
call ___main
fnstcw 14(%esp)
fldz
fadd %st(0), %st
fadds LC0
fadds LC0
fsts 140(%esp)
movl 140(%esp), %eax
fsts 144(%esp)
movl %eax, 20(%esp)
movl 144(%esp), %eax
fsts 148(%esp)
movl %eax, 24(%esp)
fsts 152(%esp)
movl 148(%esp), %eax
fsts 156(%esp)
movl %eax, 28(%esp)
fsts 160(%esp)
movl 152(%esp), %eax
fsts 164(%esp)
movl %eax, 32(%esp)
fsts 168(%esp)
movl 156(%esp), %eax
fstps 172(%esp)
movl %eax, 36(%esp)
movl 160(%esp), %eax
flds 24(%esp)
movl %eax, 40(%esp)
movl 164(%esp), %eax
movl %eax, 44(%esp)
movl 168(%esp), %eax
movl %eax, 48(%esp)
movl 172(%esp), %eax
movl %eax, 52(%esp)
movzwl 14(%esp), %eax
movb $12, %ah
movw %ax, 12(%esp)
fldcw 12(%esp)
fistpl 8(%esp)
fldcw 14(%esp)
movl 8(%esp), %edx
flds 20(%esp)
fldcw 12(%esp)
fistpl 8(%esp)
fldcw 14(%esp)
movl 8(%esp), %eax
flds 28(%esp)
addl %eax, %edx
fldcw 12(%esp)
fistpl 8(%esp)
fldcw 14(%esp)
movl 8(%esp), %eax
flds 32(%esp)
addl %eax, %edx
fldcw 12(%esp)
fistpl 8(%esp)
fldcw 14(%esp)
movl 8(%esp), %eax
flds 36(%esp)
addl %eax, %edx
fldcw 12(%esp)
fistpl 8(%esp)
fldcw 14(%esp)
movl 8(%esp), %eax
flds 40(%esp)
addl %eax, %edx
fldcw 12(%esp)
fistpl 8(%esp)
fldcw 14(%esp)
movl 8(%esp), %eax
flds 44(%esp)
addl %eax, %edx
fldcw 12(%esp)
fistpl 8(%esp)
fldcw 14(%esp)
movl 8(%esp), %eax
flds 48(%esp)
addl %eax, %edx
fldcw 12(%esp)
fistpl 8(%esp)
fldcw 14(%esp)
movl 8(%esp), %eax
flds 52(%esp)
addl %eax, %edx
fldcw 12(%esp)
fistpl 8(%esp)
fldcw 14(%esp)
movl 8(%esp), %eax
leave
addl %edx, %eax
ret
There is not a single trace of any copy/move constructor or any function call at all. Everything is unrolled into a fast math-grinding stream of instructions.
Seriously, there is no need to write additional handlers for r-values. The compiler makes the perfect code without them.