I'm looking for a way to overload operator[] (within a broader SIMD class) to facilitate reading and writing individual elements within a SIMD word (e.g. __m512i). A couple constraints:
- Compliant with C++11 (or later)
- Compatible with additional intrinsics based code
- Not OpenCL/SYCL (which I could, but I can't *sigh*)
- Mostly portable across g++, icpc, clang++
- Preferably applicable to other SIMD beyond Intel (ARM, IBM, etc...)
- (edit) Performance isn't really an issue (not generally used in places where performance matters)
(This rules out things like type punning through pointer casting, and GCC vector types.)
Based heavily on Scott Meyers' "More Effective C++" (Item 30), and other code I've come up with the following MVC code that seems "right", that seems to work, but also seems over complicated. (The "proxy" approach is meant to deal with the left/right hand operator[] usage, and the "memcpy" is meant to deal with the type punning/C++ standard issue.)
I'm wonder if someone has a better solution (and can explain it so I learn something ;^))
#include <iostream>
#include <cstring>
#include "immintrin.h"
using T = __m256i; // SIMD type
using Te = unsigned int; // SIMD element type
class SIMD {
class SIMDProxy;
public :
const SIMDProxy operator[](int index) const {
std::cout << "SIMD::operator[] const" << std::endl;
return SIMDProxy(const_cast<SIMD&>(*this), index);
}
SIMDProxy operator[](int index){
std::cout << "SIMD::operator[]" << std::endl;
return SIMDProxy(*this, index);
}
Te get(int index) {
std::cout << "SIMD::get" << std::endl;
alignas(T) Te tmp[8];
std::memcpy(tmp, &value, sizeof(T)); // _mm256_store_si256(reinterpret_cast<__m256i *>(tmp), c.value);
return tmp[index];
}
void set(int index, Te x) {
std::cout << "SIMD::set" << std::endl;
alignas(T) Te tmp[8];
std::memcpy(tmp, &value, sizeof(T)); // _mm256_store_si256(reinterpret_cast<__m256i *>(tmp), c.value);
tmp[index] = x;
std::memcpy(&value, tmp, sizeof(T)); // c.value = _mm256_load_si256(reinterpret_cast<__m256i const *>(tmp));
}
void splat(Te x) {
alignas(T) Te tmp[8];
std::memcpy(tmp, &value, sizeof(T));
for (int i=0; i<8; i++) tmp[i] = x;
std::memcpy(&value, tmp, sizeof(T));
}
void print() {
alignas(T) Te tmp[8];
std::memcpy(tmp, &value, sizeof(T));
for (int i=0; i<8; i++) std::cout << tmp[i] << " ";
std::cout << std::endl;
}
protected :
private :
T value;
class SIMDProxy {
public :
SIMDProxy(SIMD & c_, int index_) : c(c_), index(index_) {};
// lvalue access
SIMDProxy& operator=(const SIMDProxy& rhs) {
std::cout << "SIMDProxy::=SIMDProxy" << std::endl;
c.set(rhs.index, rhs.c.get(rhs.index));
return *this;
}
SIMDProxy& operator=(Te x) {
std::cout << "SIMDProxy::=T" << std::endl;
c.set(index,x);
return *this;
}
// rvalue access
operator Te() const {
std::cout << "SIMDProxy::()" << std::endl;
return c.get(index);
}
private:
SIMD& c; // SIMD this proxy refers to
int index; // index of element we want
};
friend class SIMDProxy; // give SIMDProxy access into SIMD
};
/** a little main to exercise things **/
int
main(int argc, char *argv[])
{
SIMD x, y;
Te a = 3;
x.splat(1);
x.print();
y.splat(2);
y.print();
x[0] = a;
x.print();
y[1] = a;
y.print();
x[1] = y[1];
x.print();
}