I want to scatterv a 3D array in MPI Fortran using derived data types. I know how to do it by flattening an array as shown below. Is is possible to scatter without flattening the array (by using derived data types)
I have provided an example where I Scatter the (5,3,2) 3D array among let's say 2 procs. First by flattening the 3D array. Then scatter flattened amongst processors. Now I want to achieve the same without flattening the array. So that I avoid creating temporary arrays for storing flattened global and local arrays.
I have also included the use of MPI_Type_create_subarray to scatter sub-arrays. But it works only if sub-array sizes are equal. Can someone help me with how to do this when subarray sizes are different?
It works if I distribute (4,3,2) among two procs as (2,3,2) and (2,3,2). If I want to distribute (5,3,2) among two procs as (3,3,2) and (2,3,2) it doesn't work. Can someone help with that?
!Example code to Scatter 3D array using MPI subarray type
program ex_scatterv
use mpi
use iso_fortran_env, only : real64
implicit none
real(real64), allocatable,dimension(:,:,:) :: array, array_local
real(real64), allocatable,dimension(:) :: array_flat, array_local_flat
integer :: rank, num_procs, mpierr, i, j, k
integer :: nx, ny, nz, str_idx, end_idx, local_size, local_size_flat
integer, dimension(:), allocatable :: sendcounts, displacements
integer :: sizes(3), sub_sizes(3), starts(3), recv_starts(3), recv_sizes(3), &
send_type, resize_send_type, recv_type
integer(kind=MPI_OFFSET_KIND) :: lb, extent
call mpi_init(mpierr)
call mpi_comm_size(mpi_comm_world, num_procs, mpierr)
call mpi_comm_rank(mpi_comm_world, rank, mpierr)
nx=5
ny=3
nz=2
!allocate in the root rank
if(rank==0) then
allocate(array(nx,ny,nz))
allocate(array_flat(nx*ny*nz))
else !for other procs allocate with zero size
allocate(array(0,0,0))
endif
!assign values to the array
if(rank==0) then
do k=1,nz
do j=1,ny
do i=1,nx
array(i,j,k) = (i-1)+(j-1)*nx+(k-1)*nx*ny
end do
end do
end do
print*, "Before scattering..."
print*, array
!flatten the 3D array
forall(k=1:nz, j=1:ny, i=1:nx) array_flat(k+(j-1)*nz+(i-1)*ny*nz)=array(i,j,k)
endif
!distribute the 3d array among different procs
call distribute_points(nx, rank, num_procs, str_idx, end_idx)
local_size = end_idx - str_idx + 1
local_size_flat = local_size*ny*nz
!allocate local(for each rank) arrays
allocate(array_local_flat(local_size_flat))
allocate(array_local(local_size, ny, nz))
!allocate sendcoutns and displacements arrays for braodcasting
allocate(sendcounts(num_procs), displacements(num_procs))
!gather displacements and sendcounts for all ranks
call MPI_Allgather(str_idx, 1, MPI_INTEGER, displacements, 1, MPI_INTEGER, &
MPI_COMM_WORLD, mpierr)
call MPI_Allgather(local_size, 1, MPI_INTEGER, sendcounts, 1, &
MPI_INTEGER, MPI_COMM_WORLD, mpierr)
!total sendcounts and displacements
sendcounts = sendcounts*ny*nz
displacements = displacements - 1 !Array index starts with 0 in MPI (C)
displacements = displacements*ny*nz
!scatter the flattened array among procs
call MPI_Scatterv(array_flat, sendcounts, displacements, MPI_DOUBLE_PRECISION, &
array_local_flat, local_size*ny*nz, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, &
mpierr)
!form 3D array from flattened local array
forall(k=1:nz, j=1:ny, i=1:local_size) array_local(i,j,k) = &
array_local_flat(k+(j-1)*nz+(i-1)*ny*nz)
print*, "Scattered array: ", rank
print*, array_local
!Scatterning using subarray type
sizes = [nx, ny, nz]
sub_sizes = [local_size, ny, nz]
starts = [0, 0, 0]
recv_starts = [0, 0, 0]
!to get extent of MPI_DOUBLE_PRECISION
call MPI_Type_get_extent(MPI_DOUBLE_PRECISION, lb, extent, mpierr)
call MPI_Type_create_subarray(3, sizes, sub_sizes, starts, &
MPI_ORDER_FORTRAN, MPI_DOUBLE_PRECISION, send_type, mpierr)
call MPI_Type_create_resized(send_type, 0, local_size*extent, &
resize_send_type, mpierr)
call MPI_Type_commit(resize_send_type, mpierr)
call MPI_Type_create_subarray(3, sub_sizes, sub_sizes, recv_starts, &
MPI_ORDER_FORTRAN, MPI_DOUBLE_PRECISION, recv_type, mpierr)
call MPI_Type_commit(recv_type, mpierr)
sendcounts=1
call MPI_Allgather(rank, 1, MPI_INTEGER, displacements, 1, MPI_INTEGER, &
MPI_COMM_WORLD, mpierr)
call MPI_Scatterv(array, sendcounts, displacements, resize_send_type, array_local, &
sendcounts, recv_type, 0, MPI_COMM_WORLD, mpierr)
print*, "Scattered array with subarray: ", rank
print*, array_local
call MPI_Finalize(mpierr)
contains
subroutine distribute_points(npts, rank, size, start_idx, end_idx)
implicit none
integer, intent(in) :: npts, size, rank
integer, intent(out) :: start_idx, end_idx
integer :: pts_per_proc
pts_per_proc = npts/size
if(rank < mod(npts, size)) then
pts_per_proc=pts_per_proc + 1
end if
if(rank < mod(npts, size)) then
start_idx = rank * pts_per_proc + 1
end_idx = (rank + 1) * pts_per_proc
else
start_idx = mod(npts, size) + rank*pts_per_proc + 1
end_idx = mod(npts, size) + (rank + 1) * pts_per_proc
end if
end subroutine distribute_points
end program ex_scatterv
I want to do it without flattening and creating temporary arrays.