I have the following code modified from https://software.intel.com/content/www/us/en/develop/documentation/mkl-tutorial-c/top/multiplying-matrices-using-dgemm.html
! Fortran source code is found in dgemm_example.f
PROGRAM MAIN
use omp_lib
IMPLICIT NONE
DOUBLE PRECISION ALPHA, BETA
INTEGER M, K, N, I, J
integer, parameter :: dp = selected_real_kind(15, 307)
PARAMETER (M=2000, K=200, N=1000)
DOUBLE PRECISION A(M,K), B(K,N), C(M,N), AT(K,M)
PRINT *, "This example computes real matrix C=alpha*A*B+beta*C"
PRINT *, "using Intel(R) MKL function dgemm, where A, B, and C"
PRINT *, "are matrices and alpha and beta are double precision "
PRINT *, "scalars"
PRINT *, ""
PRINT *, "Initializing data for matrix multiplication C=A*B for "
PRINT 10, " matrix A(",M," x",K, ") and matrix B(", K," x", N, ")"
10 FORMAT(a,I5,a,I5,a,I5,a,I5,a)
PRINT *, ""
ALPHA = 1.0
BETA = 0.0
PRINT *, "Intializing matrix data"
PRINT *, ""
DO I = 1, M
DO J = 1, K
A(I,J) = (I-1) * K + J
! AT(J,I) = (I-1) * K + J
END DO
END DO
AT = TRANSPOSE(A)
!write (*,*) A(1,2), AT(2,1)
DO I = 1, K
DO J = 1, N
B(I,J) = -((I-1) * N + J)
END DO
END DO
DO I = 1, M
DO J = 1, N
C(I,J) = 0.0
END DO
END DO
PRINT *, "Computing matrix product using Intel(R) MKL DGEMM "
PRINT *, "subroutine"
! C := alpha*op( A )*op( B ) + beta*C,
! M*K K*N
!CALL DGEMM('N','N',M,N,K,ALPHA,A,M,B,K,BETA,C,M)
! for op(A), op(B), LDA is for A
CALL DGEMM('T','N',M,N,K,ALPHA,AT,K,B,K,BETA,C,M)
PRINT *, "Computations completed."
PRINT *, ""
PRINT *, "Top left corner of matrix A:"
PRINT 20, ((A(I,J), J = 1,MIN(K,6)), I = 1,MIN(M,6))
PRINT *, ""
PRINT *, "Top left corner of matrix B:"
PRINT 20, ((B(I,J),J = 1,MIN(N,6)), I = 1,MIN(K,6))
PRINT *, ""
20 FORMAT(6(F12.0,1x))
PRINT *, "Top left corner of matrix C:"
PRINT 30, ((C(I,J), J = 1,MIN(N,6)), I = 1,MIN(M,6))
PRINT *, ""
30 FORMAT(6(ES12.4,1x))
PRINT *, "Example completed."
STOP
END
I tried to compile it by make
MKLROOT = /home/user/intel/compilers_and_libraries_2020.3.279/linux/mkl
CFLAGS = -L${MKLROOT}/lib/intel64/libmkl_scalapack_ilp64.a -Wl,--start-group -L${MKLROOT}/lib/intel64/libmkl_gf_ilp64.a -L${MKLROOT}/lib/intel64/libmkl_gnu_thread.a -L${MKLROOT}/lib/intel64/libmkl_core.a -L${MKLROOT}/lib/intel64/libmkl_blacs_openmpi_ilp64.a -Wl,--end-group -lgomp -lpthread -lm -ldl
#blas-1.exe: blas-1.f90
dgemm.exe: dgemm.f90
# gfortran blas-1.f90 $(CFLAGS) -fopenmp -lblas -o blas-1.exe
gfortran dgemm.f90 -lblas -fopenmp -o dgemm.exe
# gfortran blas-1.f90 -lblas -fopenmp -o blas-1.exe
clean:
rm *.exe
The compiling is OK. In running ./dgemm
, it leads to Segmentation fault (core dumped)
, when there is a -fopenmp
flag in Makefile
.
I set export OMP_STACKSIZE=6000m
in my .bashrc
file
Why -fopenmp
leads to this error message? Is that due to the example follows fortran 77 style, and has some issue with -fopenmp
?
By using allocation, it solved my problem, as suggested in Why Segmentation fault is happening in this openmp code? by ceeely
real(dp), allocatable :: A(:,:), B(:,:), C(:,:), AT(:,:)
if (.not. allocated(A)) allocate(A(M,K))
if (.not. allocated(B)) allocate(B(K,N))
if (.not. allocated(C)) allocate(C(M,N))
if (.not. allocated(AT)) allocate(AT(K,M))
...
deallocate(A,B,C,AT)
possibly heap is better than stack memory.