I have an input array of length 32 with 16 bit precision.
consider,
__attribute__ ((aligned(32))) short inp[32] = {-1, -2, -3, -4, -5, -6, -7, -8, 9, 10, 11, 12, 13, 14, 15, 16, -17, -18, -19, -20, -21, -22, -23, -24, 25, 26, 27, 28, 29, 30, 31, 32};
short res[8];
I have to perform,
res = min(inp(0:7), inp(8:15));
to do this I tried few things which are attached below,
// Method 1
__m256i* avxinp0 = (__m256i*) inp;
__m256i* avxinp1 = (__m256i*) (inp+8);
__m256i avxres = _mm256_min_epi16(*avxinp0, *avxinp1);
memcpy(res, (short*)avxres, 8*sizeof(short));
// Method 2
__m256i* avxinp0 = (__m256i*) inp0;
__m256i* avxinp1 = (__m256i*) (inp0+8);
__m256i mem0;
_mm256_store_si256(&mem0, *avxinp1);
__m256i avxres = _mm256_min_epi16(*avxinp0, mem0);
memcpy(res, (short*)avxres, 8*sizeof(short));
Both are giving segmentation fault while compiling. GCC is used to compile.What's the reason for it?. Are there any ways to vectorize it?