I'm trying to use avx-512 to do matrix transpose. But now my code can only transpose square matrix. If I don't specify the array size of A(for example 100) before calling the function, there will be *** stack smashing detected ***: terminated Aborted (core dumped).
#include <immintrin.h>
#include <complex.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
void matrix_transpose_avx(float *matA_re, float *matA_im, int rowA, int colA){
float *temp_re = (float *)malloc(rowA * colA * sizeof(float));
float *temp_im = (float *)malloc(rowA * colA * sizeof(float));
memcpy(temp_re, matA_re, (rowA * colA * sizeof(float)));
memcpy(temp_im, matA_im, (rowA * colA * sizeof(float)));
__m512 re_vec, im_vec;
for (int i = 0; i < rowA; ++i){
for (int j = 0; j < colA; ++j){
re_vec = _mm512_loadu_ps(&temp_re[j * rowA + i]);
_mm512_storeu_ps(&matA_re[i * colA + j], re_vec);
im_vec = _mm512_loadu_ps(&temp_im[j * rowA + i]);
_mm512_storeu_ps(&matA_im[i * colA + j], im_vec);
}
}
free(temp_re);
free(temp_im);
}
int main(){
int rowA = 3;
int colA = 2;
float A_re[100] = {1, 2, 4, 0, 3, 6};
float A_im[100] = {1, 2, 4, 0, 3, 6};
matrix_transpose_avx(A_re, A_im, rowA, colA);
return 0;
}
I hope someone can tell me how to modify the code, so the function can transpose any matrix sucessfully and no need to specify the array size of A(for example 100) before performing the transpose operation. Without altering the function's defined parameters when defining the function.
I really need Proficient coder's help. Thanks in advance.