I have write a master-slave IO function with Fortran. First, I read the file with 0 process, put the data in the array read_buffer
, and then I call subroutine"scatter_data".
I have created some communicators to scatter data from process 0 to process 0-8.
These communicators are like this:
2, 5, 8 :sub_io_communicator (In this sub_io_communicator , sub_iorank is /0, 1, 2/ )
1, 4, 7 :sub_io_communicator (In this sub_io_communicator , sub_iorank is /0, 1, 2/ )
0, 3, 6 :sub_io_communicator (In this sub_io_communicator , sub_iorank is /0, 1, 2/ )
2, 1, 0 :master_communicator
DATA
||
0 process read
||
0 call MPI_scatterv in communicator "master_communicator"
/ | \
0 1 2 call MPI_scatterv in communicator "sub_io_communicator"
/ | \ / | \ / | \
0 3 6 1 4 7 2 5 8
but when I call MPI_Scatterv, it crashes. I use "print" to debug it , and find the bug is in "call MPI_Scatterv". SO, I write a very simple MPI_Scatterv in this subroutine to see whether it will work, but it does not.
My code is like this :
SUBROUTINE scatter_data(read_buffer,ne_in)
use naqpms_nest, only : nest, nxlo, nylo, ratio, nx, ny
implicit none
include 'mpif.h'
integer, INTENT(IN) :: ne_in
integer :: ierr
integer :: location
integer :: ii, jj, kk, zz, dd, send_size,receive_size
real, INTENT(IN), dimension(nx(ne_in)*ny(ne_in)) :: read_buffer
integer, dimension(nx(ne_in)*ny(ne_in)) :: read_buffer_int
real, allocatable :: rerange_buffer(:)
integer, allocatable :: receive_buffer_int(:)
integer , allocatable :: counts_recv(:),displacements(:)
integer :: distance,left_bdy,left_bdy2,rigth_bdy,rigth_bdy2
integer :: tmp1(3), tmp2(3)
IF( sub_iorank.EQ.0 ) THEN
if(allocated(counts_recv))deallocate(counts_recv)
if(allocated(displacements))deallocate(displacements)
if(allocated(receive_buffer))deallocate(receive_buffer)
if(allocated(rerange_buffer))deallocate(rerange_buffer)
IF( master_iorank.EQ.0) THEN
allocate(counts_recv(dims(2,ne_in)))
allocate(displacements(dims(2,ne_in)))
ENDIF
print*,"location",my_rank,nx(ne_in), ey(ne_in), sy(ne_in)
receive_size = nx(ne_in) * (ey(ne_in)-sy(ne_in)+1)
allocate(receive_buffer(receive_size))
allocate(receive_buffer_int(receive_size))
allocate(rerange_buffer(receive_size))
CALL MPI_Gather(receive_size, 1, MPI_INTEGER, counts_recv, 1, MPI_INTEGER,&
0, master_communicator ,ierr )
tmp1(1:3)= (/1,1,1/)
tmp2(1:3)= (/0,1,2/)
IF(my_rank==0)print*,"counts_recv",counts_recv
CALL mpi_scatterv (counts_recv, tmp1, tmp2, MPI_INT,& ! this is just for test
receive_size, 1, mpi_int, 0, master_communicator, ierr )
print*,my_rank,receive_size
IF(my_rank.EQ.0) THEN
displacements(1)=0
do ii=2, dims(2,ne_in)
displacements(ii) = displacements(ii-1) + counts_recv(ii-1)
enddo
ENDIF
IF(my_rank==0)print*,displacements,counts_recv
CALL mpi_scatterv (read_buffer, counts_recv, displacements, mpi_real,&
receive_buffer, receive_size, mpi_real, 0, master_communicator, ierr )
IF(my_rank==0)print*,"mpi_scatterv one ok",my_rank
ENDIF!sub_iorank =0
IF(sub_iorank .EQ. 0) THEN
if(allocated(counts_recv))deallocate(counts_recv)
if(allocated(displacements))deallocate(displacements)
allocate(counts_recv(dims(1,ne_in)))
allocate(displacements(dims(1,ne_in)))
ENDIF
receive_size =(ex(ne_in)-sx(ne_in)+1)*(ey(ne_in)-sy(ne_in)+1)
CALL MPI_Gather(receive_size, 1, MPI_INTEGER, counts_recv, 1, MPI_INTEGER, 0, sub_io_communicator ,ierr )
IF(sub_iorank .EQ. 0) THEN
displacements(1)=0
do ii=2,dims(1,ne_in)
displacements(ii) = displacements(ii-1) + counts_recv(ii-1)
enddo
ENDIF
IF(sub_iorank .EQ. 0) THEN
DO dd = 1, dims(1,ne_in)
DO jj = 1, bdy_gather(4,dd,ne_in)-bdy_gather(3,dd,ne_in)+1
distance = bdy_gather(2,dd,ne_in)-bdy_gather(1,dd,ne_in)+1
left_bdy = (jj-1)*nx(ne_in) + bdy_gather(1,dd,ne_in)
rigth_bdy = (jj-1)*nx(ne_in) + bdy_gather(2,dd,ne_in)
left_bdy2 = displacements(dd) + (jj-1)*distance + 1
rigth_bdy2 = displacements(dd) + (jj-1)*distance + distance
rerange_buffer( left_bdy2 : rigth_bdy2) = receive_buffer(left_bdy : rigth_bdy )
ENDDO
ENDDO
ENDIF
if(allocated(receive_buffer))deallocate(receive_buffer)
allocate(receive_buffer(receive_size))
IF(sub_iorank .EQ. 0) print*, my_rank, counts_recv, displacements
CALL mpi_scatterv( rerange_buffer, counts_recv, displacements, mpi_real,&
receive_buffer, receive_size, mpi_real, 0, sub_io_communicator,ierr)
IF(sub_iorank .EQ. 0) print*,"mpi_scatterv ok"
END SUBROUTINE scatter_data
I run this code : mpirun -np 9 ./gnaqpms.v1.6.0_jx0307.exe
then, the error in the log file is like this :
location 0 88 26 1
location 1 88 52 27
location 2 88 77 53
counts_recv 2288 2288 2200
1 2288
2 2200
*** Error in forrtl: error (76): Abort trap signal
Image PC Routine Line Source
gnaqpms.v1.6.0_jx 00000000007A5F3A Unknown Unknown Unknown
libpthread-2.17.s 00002BA00DADA5D0 Unknown Unknown Unknown
libc-2.17.so 00002BA00E01F207 gsignal Unknown Unknown
libc-2.17.so 00002BA00E0208F8 abort Unknown Unknown
libc-2.17.so 00002BA00E061D27 Unknown Unknown Unknown
libc-2.17.so 00002BA00E06A489 Unknown Unknown Unknown
libmpi.so.12.0 00002BA00CAC2AED Unknown Unknown Unknown
libmpi.so.12.0 00002BA00CAC4A54 Unknown Unknown Unknown
libmpi.so.12 00002BA00CAC3188 MPI_Scatterv Unknown Unknown
libmpifort.so.12. 00002BA00D445A7A mpi_scatterv Unknown Unknown
gnaqpms.v1.6.0_jx 0000000000475EA4 naqpms_parallel_m 1269 naqpms_parallel.f90
gnaqpms.v1.6.0_jx 00000000005953BF rd_met_pyramid_ 151 rd_met_pyramid.f90
gnaqpms.v1.6.0_jx 0000000000617709 read_data_ 61 naqpms_readdata.f90
gnaqpms.v1.6.0_jx 0000000000647BA4 naqpms_calc_mp_ca 141 naqpms_calc.f90
gnaqpms.v1.6.0_jx 000000000065A835 MAIN__ 86 main.f90
gnaqpms.v1.6.0_jx 000000000040B45E Unknown Unknown Unknown
libc-2.17.so 00002BA00E00B3D5 __libc_start_main Unknown Unknown
gnaqpms.v1.6.0_jx 000000000040B369 Unknown Unknown Unknown