I have a C code snippet using MPI as follows:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main(int argc, char *argv[])
{
float **p=NULL, **buffer=NULL;
int it, nt=3, i, j, k, NP, MYID, nx=1, nz=2, nsrc=3, isrc;
MPI_Init ( &argc, &argv );
MPI_Comm_size ( MPI_COMM_WORLD, &NP );
MPI_Comm_rank ( MPI_COMM_WORLD, &MYID );
p = (float **)calloc(nz,sizeof(float *));
for (i=0;i<nz;i++) p[i] = (float *)calloc(nx,sizeof(float));
buffer = (float **)calloc(nz,sizeof(float *));
for (i=0;i<nz;i++) buffer[i] = (float *)calloc(nx,sizeof(float));
for (it=0; it<nt; it++){
for (isrc=MYID; isrc<nsrc; isrc+=NP){
for (j=0; j<nz; j++){
for (i=0; i<nx; i++){
p[j][i] += 1.5 + (float)(isrc) + (float)(j);
}
}
}
for (k=0;k<nsrc-1;k++){
if (MYID==k){
buffer = p; /*swap pointer*/
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Bcast(&buffer[0][0],nx*nz,MPI_FLOAT,k,MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
for (j=0; j<nz; j++){
for (i=0; i<nx; i++){
printf("it=%d,k=%d,Node %d,buffer[%d][%d]=%f\n",it,k,MYID,j,i,buffer[j][i]);
}
}
}
}
MPI_Finalize();
exit(0);
}
If you run it with 3 cores mpirun -np 3 ./main
, it will give wrong results:
it=0,k=0,Node 0,buffer[0][0]=1.500000
it=0,k=0,Node 0,buffer[1][0]=2.500000
it=0,k=1,Node 0,buffer[0][0]=2.500000
it=0,k=1,Node 0,buffer[1][0]=3.500000
it=0,k=0,Node 1,buffer[0][0]=1.500000
it=0,k=0,Node 1,buffer[1][0]=2.500000
it=0,k=1,Node 1,buffer[0][0]=2.500000
it=0,k=1,Node 1,buffer[1][0]=3.500000
it=1,k=0,Node 1,buffer[0][0]=4.000000
it=1,k=0,Node 1,buffer[1][0]=6.000000
it=0,k=0,Node 2,buffer[0][0]=1.500000
it=0,k=0,Node 2,buffer[1][0]=2.500000
it=0,k=1,Node 2,buffer[0][0]=2.500000
it=0,k=1,Node 2,buffer[1][0]=3.500000
it=1,k=0,Node 2,buffer[0][0]=4.000000
it=1,k=0,Node 2,buffer[1][0]=6.000000
it=1,k=1,Node 2,buffer[0][0]=4.000000
it=1,k=0,Node 0,buffer[0][0]=4.000000
it=1,k=0,Node 0,buffer[1][0]=6.000000
it=1,k=1,Node 0,buffer[0][0]=4.000000
it=1,k=1,Node 0,buffer[1][0]=6.000000
it=1,k=1,Node 1,buffer[0][0]=4.000000
it=1,k=1,Node 1,buffer[1][0]=6.000000
it=2,k=0,Node 1,buffer[0][0]=5.500000
it=1,k=1,Node 2,buffer[1][0]=6.000000
it=2,k=0,Node 2,buffer[0][0]=5.500000
it=2,k=0,Node 2,buffer[1][0]=8.500000
it=2,k=0,Node 0,buffer[0][0]=5.500000
it=2,k=0,Node 0,buffer[1][0]=8.500000
it=2,k=0,Node 1,buffer[1][0]=8.500000
it=2,k=1,Node 1,buffer[0][0]=5.500000
it=2,k=1,Node 0,buffer[0][0]=5.500000
it=2,k=1,Node 0,buffer[1][0]=8.500000
it=2,k=1,Node 1,buffer[1][0]=8.500000
it=2,k=1,Node 2,buffer[0][0]=5.500000
it=2,k=1,Node 2,buffer[1][0]=8.500000
However,if I change the lines of /*swap pointer*/
into the following:
for (j=0; j<nz; j++){
for (i=0; i<nx; i++){
buffer[j][i] = p[j][i];
}
}
the code immediately gives the correct results:
it=0,k=0,Node 0,buffer[0][0]=1.500000
it=0,k=0,Node 0,buffer[1][0]=2.500000
it=0,k=0,Node 1,buffer[0][0]=1.500000
it=0,k=0,Node 1,buffer[1][0]=2.500000
it=0,k=0,Node 2,buffer[0][0]=1.500000
it=0,k=0,Node 2,buffer[1][0]=2.500000
it=0,k=1,Node 0,buffer[0][0]=2.500000
it=0,k=1,Node 0,buffer[1][0]=3.500000
it=0,k=1,Node 1,buffer[0][0]=2.500000
it=0,k=1,Node 1,buffer[1][0]=3.500000
it=0,k=1,Node 2,buffer[0][0]=2.500000
it=0,k=1,Node 2,buffer[1][0]=3.500000
it=1,k=0,Node 2,buffer[0][0]=3.000000
it=1,k=0,Node 0,buffer[0][0]=3.000000
it=1,k=0,Node 0,buffer[1][0]=5.000000
it=1,k=0,Node 1,buffer[0][0]=3.000000
it=1,k=0,Node 1,buffer[1][0]=5.000000
it=1,k=0,Node 2,buffer[1][0]=5.000000
it=1,k=1,Node 2,buffer[0][0]=5.000000
it=1,k=1,Node 0,buffer[0][0]=5.000000
it=1,k=1,Node 0,buffer[1][0]=7.000000
it=1,k=1,Node 1,buffer[0][0]=5.000000
it=1,k=1,Node 1,buffer[1][0]=7.000000
it=1,k=1,Node 2,buffer[1][0]=7.000000
it=2,k=0,Node 2,buffer[0][0]=4.500000
it=2,k=0,Node 2,buffer[1][0]=7.500000
it=2,k=0,Node 0,buffer[0][0]=4.500000
it=2,k=0,Node 0,buffer[1][0]=7.500000
it=2,k=0,Node 1,buffer[0][0]=4.500000
it=2,k=0,Node 1,buffer[1][0]=7.500000
it=2,k=1,Node 0,buffer[0][0]=7.500000
it=2,k=1,Node 1,buffer[0][0]=7.500000
it=2,k=1,Node 2,buffer[0][0]=7.500000
it=2,k=1,Node 2,buffer[1][0]=10.500000
it=2,k=1,Node 0,buffer[1][0]=10.500000
it=2,k=1,Node 1,buffer[1][0]=10.500000
My question is: Why did I just change the way to assign values can alter the correctness of the outputs?