I have two functions of 2d arrays multiplication. One of them with SSE. Another function without any optimization. Both functions work well. But the results are slightly different. For example 20.333334 and 20.333332.
Can you explain why the results are different? And what can I do with functions to have the same result?
function with SSE
float** sse_multiplication(float** array1, float** array2, float** arraycheck)
{
int i, j, k;
float *ms1, *ms2, result;
float *end_loop;
for( i = 0; i < rows1; i++)
{
for( j = 0; j < columns2; j++)
{
result = 0;
ms1 = array1[i];
ms2 = array2[j];
end_loop = &array1[i][columns1];
__asm{
mov rax, ms1
mov rbx, ms2
mov rdx, end_loop
xorps xmm2, xmm2
loop:
movups xmm0, [rax]
movups xmm1, [rbx]
movups xmm3, [rax+16]
movups xmm4, [rbx+16]
mulps xmm0, xmm1
mulps xmm3, xmm4
addps xmm2, xmm0
add rax, 32
add rbx, 32
cmp rdx, rax
jne loop
haddps xmm2, xmm2
haddps xmm2, xmm2
movups result, xmm2
}
arraycheck[i][j] = result;
}
}
return arraycheck;
}
function without any optimization
float** multiplication(float** array1, float** array2, float** arraycheck)
{
for (int i = 0; i < rows1; i++)
for (int j = 0; j < columns2; j++)
for (int k = 0; k < rows1; k++)
arraycheck[i][j] += array1[i][k] * array2[k][j];
return arraycheck;
}