I have these loops here where I calculate array 'tab'. I tried using the openmp reduction but it doesn't work. I get a seg. fault with OMP_NUM_THREADS greater than 1.
What is it I'm doing wrong?
Regards.
!$OMP PARALLEL DO DEFAULT(SHARED) &
!$OMP PRIVATE(ii,ix_tab,iy_tab,ipx,ipy,iw,C) &
!$OMP REDUCTION(+:tab)
do ii = 1,N
ix_tab = ...
iy_tab = ...
do ipy = -npy_max,npy_max
do ipx = -npx_max,npx_max
do iw = 1, M
C = Fx(iw,ipx,ix_tab) * Fy(iw,ipy,iy_tab)
tab(iw,ipx,ipy) = tab(iw,ipx,ipy) + A(iw,ii) * C
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
--- EDIT ---
Ok, here is my solution:
allocate wrk(nt,-npx_max:npx_max,-npy_max:npy_max,nthreads)
!$OMP PARALLEL DO DEFAULT(SHARED) &
!$OMP PRIVATE(tid,ii,ix_tab,iy_tab,ipx,ipy,iw,C)
tid = OMP_GET_THREAD_NUM() + 1
!$OMP DO
do ii = 1,N
ix_tab = ...
iy_tab = ...
do ipy = -npy_max,npy_max
do ipx = -npx_max,npx_max
do iw = 1, M
C = Fx(iw,ipx,ix_tab) * Fy(iw,ipy,iy_tab)
wrk(iw,ipx,ipy,tid) = wrk(iw,ipx,ipy,tid) + A(iw,ii) * C
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
do tid = 1, nthreads
tab(:,:,:) = tab(:,:,:) + wrk(:,:,:,tid)
enddo
deallocate(wrk)
Can it be done better? faster?
Regards.