I know this has been answered many times before and there is a comprehensive answer here which I have read and attempted to use but I just can't get my code to work for some reason.
I have stripped my code down a bit to make it a bit easier to follow, but basically what I am trying to do is have each process initialise a sub-array and work on it, then put the whole big array back together on rank 0. MPI_Gatherv
is giving me a segfault and I cannot figure out why.
Any help would be greatly appreciated.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <mpi.h>
#define N 32
void init_lattice(double **site, int row, int col){
int i,j;
for(i=0; i<row; i++){
for(j=0; j<col; j++){
site[i][j]=(drand48()/4294967295.0 + 0.5)*2*M_PI;
}
}
}
int main(int argc, char *argv[]){
int nprocs, rank;
MPI_Init(&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
int dim = 2;
int grid[dim];
grid[0]=0;
grid[1]=0;
// Assign the grid dimensions
MPI_Dims_create(nprocs, dim, grid);
printf("Dim grid: length: %d, width: %d\n", grid[0], grid[1]);
// The new communicator
MPI_Comm comm_grid;
// Allow cyclic behavior
int periodic[dim];
periodic[0] = 1;
periodic[1] = 1;
// Create the communicator
MPI_Cart_create(MPI_COMM_WORLD, dim, grid, periodic, 0, &comm_grid);
int block_len, block_width;
block_len = N/grid[1];
block_width = N/grid[0];
int i, j;
//Create lattice subset
double *data = (double *) malloc (block_len * block_width * sizeof(double));
double **site = (double **) malloc (block_len * sizeof(double *));
for (i = 0; i < block_len; i++)
site[i] = & (data[i * block_width]);
//Initialise lattice
init_lattice(site, block_len, block_width);
MPI_Datatype newtype, subtype;
int sizes[dim];
sizes[0]=N;
sizes[1]=N;
int subsizes[dim];
subsizes[0] = block_len;
subsizes[1] = block_width;
int starts[dim];
starts[0] = 0;
starts[1] = 0;
MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_DOUBLE, &newtype);
MPI_Type_create_resized(newtype, 0, N/grid[1]*sizeof(double), &subtype);
MPI_Type_commit(&subtype);
int sendcounts[grid[0]*grid[1]];
int displs[grid[0]*grid[1]];
if (rank == 0) {
for (i=0; i<grid[0]*grid[1]; i++) sendcounts[i] = 1;
int disp = 0;
for (i=0; i<grid[0]; i++) {
for (j=0; j<grid[1]; j++) {
displs[i*grid[0]+j] = disp;
disp += 1;
}
disp += ((N/grid[1])-1)*grid[0];
}
}
//Create global lattice
double *global_data = (double *) malloc (N * N * sizeof(double));
double **global_site = (double **) malloc (N * sizeof(double *));
for (i = 0; i < N; i++)
global_site[i] = & (global_data[i * N]);
MPI_Gatherv(&(site[0][0]), N*N/(grid[0]*grid[1]), MPI_DOUBLE, &(global_site[0][0]), sendcounts, displs, subtype, 0, MPI_COMM_WORLD);
if(rank==0){
printf("Rank: %d\n", rank);
for(i=0; i<N; i++){
for(j=0; j<N; j++){
printf("%.2lf ", global_site[i][j]);
}
printf("\n");
}
}
return 0;
}
EDIT: Ok so I have changed my array allocations to contiguous memory and everything is working as it should now. Thanks talonmies!