I'm trying to compare two vector with sse2, but it seems that the _mm256_testz_si256
function doesn't work as expected.
bool is_equal(const std::vector<uint64_t>& vec1, const std::vector<uint64_t>& vec2, std::size_t s) {
uint64_t* data1 = const_cast<uint64_t*>(vec1.data());
uint64_t* data2 = const_cast<uint64_t*>(vec2.data());
__m256i v1, v2, v3, v4;
size_t start = 0;
while (start < s) {
v1 = _mm256_loadu_si256(reinterpret_cast<__m256i*>(data1 + start));
v2 = _mm256_loadu_si256(reinterpret_cast<__m256i*>(data2 + start));
__m256i r1 = _mm256_cmpeq_epi64(v1, v2);
bool mask = _mm256_testz_si256(r1, r1) == 0;
if (!mask) return false;
start += 4;
}
return true;
}
std::vector<uint64_t> vec = {1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 12};
std::vector<uint64_t> vec2 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
std::cout << is_equal(vec, vec2, vec.size()) << std::endl;
I'm not familiar with the Intel Intrinsics functions, so if someone can tell me what I'm doing wrong, it would be appreciated.