I'm trying to compare two __m256i
(4 packed 64-bit integers). To do so, I use the _mm256_cmpgt_epi64 function.
The function works as expected except for a few comparisons, as if the function did not consider the largest bit of the 64-bit integer. As shown below in the second and third comparisons.
Here a MCVE, where I expect each 64-bit integer from a
to be larger than its peer from b
(thus cp
should be 0xFFF...FFF
):
#include <immintrin.h>
#include <x86intrin.h>
#include <stdio.h>
#include <inttypes.h>
// gcc mcve.c -mavx2 -madx && ./a.out
int print_avx2_hex256(__m256i ymm)
{
const size_t n = sizeof(__m256i) / sizeof(u_int64_t);
u_int64_t buffer[n];
_mm256_storeu_si256((__m256i*)buffer, ymm);
for (int i=0; i<n; ++i)
printf("%016"PRIx64" ", buffer[i]);
printf("\n");
return 0;
}
int compare(__m256i a, __m256i b)
{
__m256i cp = _mm256_cmpgt_epi64(a,b);
print_avx2_hex256(cp); // Comparison
print_avx2_hex256(a);
print_avx2_hex256(b);
return 0;
}
int main()
{
u_int64_t _a[4] = {0xf, 0xf000000000000000, 0xd00000000000000d, 0x0f00000000000000};
u_int64_t _b[4] = {0x2, 0x2000000000000000, 0x2000000000000002, 0x0200000000000000};
__m256i a = _mm256_setr_epi64x(_a[0], _a[1], _a[2], _a[3]);
__m256i b = _mm256_setr_epi64x(_b[0], _b[1], _b[2], _b[3]);
compare(a,b);
return 0;
}
However my output is as follow (in order cp
, a
, b
):
ffffffffffffffff 0000000000000000 0000000000000000 ffffffffffffffff
000000000000000f f000000000000000 d00000000000000d 0f00000000000000
0000000000000002 2000000000000000 2000000000000002 0200000000000000
I'm not familiar with the Intel Intrinsics functions, so if someone can tell me what I'm doing wrong, it would be appreciated :)