Given a value in an avx2 register, I would like to mask (with AND) then rotate by k bits There does not appear to be a rotate instruction for the entire 256 bits but there is for each 64 bits:
// this is the desired bit pattern
// ...0111110111110111110111110111110
// set every kth bit to 0
inline __m256i setkthzero(const uint32_t k) {
const uint64_t rotate_by = 64 % k; // each 64 bit word shifts
__m256i t = set1();
// for (uint32_t i = 0; i < 256; i += k) {
// t &= ~(1 << i); // obviously not AVX2, how to do this?
//}
uint64_t ta = 0xFFFFFFFFFFFFFFFEULL; // low bit set to zero
uint64_t tb = (ta << rotate_by) | (ta >> (64-rotate_by)); // c++ rotate
uint64_t tc = (tb << rotate_by) | (tb >> (64-rotate_by)); // c++ rotate
uint64_t td = (tc << rotate_by) | (tc >> (64-rotate_by)); // c++ rotate
__m256i mask = _mm256_set_epi64x(td, tc, tb, ta);
for (uint32_t i = 64; i > 0; i -= k) {
__m256i shift = _mm256_rol_epi64(mask, k);
mask = _mm256_and_si256(mask, shift);
}
return mask;
}
I cannot test the above code because my CPU does not support avx512 for the _mm256_rol_epi64. So first question, is there some other way in avx2 where I can reasonably do this, and second, once I have these masks, how can I rotate the entire mask by m bits?
From what I gather there isn't a single instruction, but is there any way to construct the operation? I can't think of one.
__m256i mask = setkthzero(6);
// advance to next position...
t = rol(mask, 2); // how to rotate mask by m=2 bits?
...
t = rol(mask, 1);
__m256i mask2 = setkthzero(10);
t = rol(mask2, 2);
...
t = rol(mask2, 4);
...
t = rol(mask2, 6);