When I use AVX to perform GEMM, I encountered such problem: when using 2d array created on stack, there won't be any error, but if I use array created on heap, then segmentation fault will happen.
This code works well.
double A[2048][2048];
double B[2048][2048];
double C[2048][2048];
__m256d vec_res = _mm256_setzero_pd();
__m256d vec_1 = _mm256_setzero_pd();
__m256d vec_2 = _mm256_setzero_pd();
for (int i = 0; i < M; i++){
for (int j = 0; j < M; j++){
vec_1 = _mm256_set1_pd(A[i][j]);
for (int k = 0; k < M; k += 4){
vec_2 = _mm256_load_pd(&B[j][k]);
vec_res = _mm256_load_pd(&C[i][k]);
vec_res = _mm256_add_pd(vec_res ,_mm256_mul_pd(vec_1, vec_2));
_mm256_store_pd(&C[i][k], vec_res);
}
}
}
But when I create array on heap, then segmentation fault will happen:
void Malloc2D(int height, int width, double ** & Mat)
{
Mat = new double * [height];
for(int i = 0; i < height; i++)
{
Mat[i] = new double [width];
}
}
int main()
{
int M, N, K;
M = N = K = 1024;
double ** A = nullptr;
double ** B = nullptr;
double ** C = nullptr;
Malloc2D(M, N, A);
Malloc2D(N, K, B);
Malloc2D(M, K, C);
__m256d vec_res = _mm256_setzero_pd();
__m256d vec_1 = _mm256_setzero_pd();
__m256d vec_2 = _mm256_setzero_pd();
for (int i = 0; i < M; i++){
for (int j = 0; j < M; j++){
vec_1 = _mm256_set1_pd(A[i][j]);
for (int k = 0; k < M; k += 4){
vec_2 = _mm256_load_pd(&B[j][k]);
vec_res = _mm256_load_pd(&C[i][k]);
vec_res = _mm256_add_pd(vec_res ,_mm256_mul_pd(vec_1, vec_2));
_mm256_store_pd(&C[i][k], vec_res);
}
}
}
return 0;
}
The error code is
Process finished with exit code 139 (interrupted by signal 11: SIGSEGV)
And IDE detected that error happened with this assembly code
vmovapd %ymm0,(%rdx)
Should I use 1d array instead of 2d?