-2

I have a C code snippet using MPI as follows:

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

int main(int argc, char *argv[])
{
float **p=NULL, **buffer=NULL;
int it, nt=3, i, j, k, NP, MYID, nx=1, nz=2, nsrc=3, isrc;

MPI_Init ( &argc, &argv );
MPI_Comm_size ( MPI_COMM_WORLD, &NP );
MPI_Comm_rank ( MPI_COMM_WORLD, &MYID ); 

p = (float **)calloc(nz,sizeof(float *));
for (i=0;i<nz;i++) p[i] = (float *)calloc(nx,sizeof(float));
buffer = (float **)calloc(nz,sizeof(float *));
for (i=0;i<nz;i++) buffer[i] = (float *)calloc(nx,sizeof(float));

for (it=0; it<nt; it++){        
    for (isrc=MYID; isrc<nsrc; isrc+=NP){
        for (j=0; j<nz; j++){
            for (i=0; i<nx; i++){
                p[j][i] += 1.5 + (float)(isrc) + (float)(j);
            }
        }            
    }

    for (k=0;k<nsrc-1;k++){ 
        if (MYID==k){ 
            buffer = p;  /*swap pointer*/          
        }
        MPI_Barrier(MPI_COMM_WORLD);
        MPI_Bcast(&buffer[0][0],nx*nz,MPI_FLOAT,k,MPI_COMM_WORLD);
        MPI_Barrier(MPI_COMM_WORLD);
        for (j=0; j<nz; j++){
            for (i=0; i<nx; i++){
                printf("it=%d,k=%d,Node %d,buffer[%d][%d]=%f\n",it,k,MYID,j,i,buffer[j][i]);
            }
        }            
    }     
}

MPI_Finalize();
exit(0);
}

If you run it with 3 cores mpirun -np 3 ./main, it will give wrong results:

it=0,k=0,Node 0,buffer[0][0]=1.500000
it=0,k=0,Node 0,buffer[1][0]=2.500000
it=0,k=1,Node 0,buffer[0][0]=2.500000
it=0,k=1,Node 0,buffer[1][0]=3.500000
it=0,k=0,Node 1,buffer[0][0]=1.500000
it=0,k=0,Node 1,buffer[1][0]=2.500000
it=0,k=1,Node 1,buffer[0][0]=2.500000
it=0,k=1,Node 1,buffer[1][0]=3.500000
it=1,k=0,Node 1,buffer[0][0]=4.000000
it=1,k=0,Node 1,buffer[1][0]=6.000000
it=0,k=0,Node 2,buffer[0][0]=1.500000
it=0,k=0,Node 2,buffer[1][0]=2.500000
it=0,k=1,Node 2,buffer[0][0]=2.500000
it=0,k=1,Node 2,buffer[1][0]=3.500000
it=1,k=0,Node 2,buffer[0][0]=4.000000
it=1,k=0,Node 2,buffer[1][0]=6.000000
it=1,k=1,Node 2,buffer[0][0]=4.000000
it=1,k=0,Node 0,buffer[0][0]=4.000000
it=1,k=0,Node 0,buffer[1][0]=6.000000
it=1,k=1,Node 0,buffer[0][0]=4.000000
it=1,k=1,Node 0,buffer[1][0]=6.000000
it=1,k=1,Node 1,buffer[0][0]=4.000000
it=1,k=1,Node 1,buffer[1][0]=6.000000
it=2,k=0,Node 1,buffer[0][0]=5.500000
it=1,k=1,Node 2,buffer[1][0]=6.000000
it=2,k=0,Node 2,buffer[0][0]=5.500000
it=2,k=0,Node 2,buffer[1][0]=8.500000
it=2,k=0,Node 0,buffer[0][0]=5.500000
it=2,k=0,Node 0,buffer[1][0]=8.500000
it=2,k=0,Node 1,buffer[1][0]=8.500000
it=2,k=1,Node 1,buffer[0][0]=5.500000
it=2,k=1,Node 0,buffer[0][0]=5.500000
it=2,k=1,Node 0,buffer[1][0]=8.500000
it=2,k=1,Node 1,buffer[1][0]=8.500000
it=2,k=1,Node 2,buffer[0][0]=5.500000
it=2,k=1,Node 2,buffer[1][0]=8.500000

However,if I change the lines of /*swap pointer*/ into the following:

for (j=0; j<nz; j++){
     for (i=0; i<nx; i++){
          buffer[j][i] = p[j][i];  
     }
}  

the code immediately gives the correct results:

it=0,k=0,Node 0,buffer[0][0]=1.500000
it=0,k=0,Node 0,buffer[1][0]=2.500000
it=0,k=0,Node 1,buffer[0][0]=1.500000
it=0,k=0,Node 1,buffer[1][0]=2.500000
it=0,k=0,Node 2,buffer[0][0]=1.500000
it=0,k=0,Node 2,buffer[1][0]=2.500000
it=0,k=1,Node 0,buffer[0][0]=2.500000
it=0,k=1,Node 0,buffer[1][0]=3.500000
it=0,k=1,Node 1,buffer[0][0]=2.500000
it=0,k=1,Node 1,buffer[1][0]=3.500000
it=0,k=1,Node 2,buffer[0][0]=2.500000
it=0,k=1,Node 2,buffer[1][0]=3.500000
it=1,k=0,Node 2,buffer[0][0]=3.000000
it=1,k=0,Node 0,buffer[0][0]=3.000000
it=1,k=0,Node 0,buffer[1][0]=5.000000
it=1,k=0,Node 1,buffer[0][0]=3.000000
it=1,k=0,Node 1,buffer[1][0]=5.000000
it=1,k=0,Node 2,buffer[1][0]=5.000000
it=1,k=1,Node 2,buffer[0][0]=5.000000
it=1,k=1,Node 0,buffer[0][0]=5.000000
it=1,k=1,Node 0,buffer[1][0]=7.000000
it=1,k=1,Node 1,buffer[0][0]=5.000000
it=1,k=1,Node 1,buffer[1][0]=7.000000
it=1,k=1,Node 2,buffer[1][0]=7.000000
it=2,k=0,Node 2,buffer[0][0]=4.500000
it=2,k=0,Node 2,buffer[1][0]=7.500000
it=2,k=0,Node 0,buffer[0][0]=4.500000
it=2,k=0,Node 0,buffer[1][0]=7.500000
it=2,k=0,Node 1,buffer[0][0]=4.500000
it=2,k=0,Node 1,buffer[1][0]=7.500000
it=2,k=1,Node 0,buffer[0][0]=7.500000
it=2,k=1,Node 1,buffer[0][0]=7.500000
it=2,k=1,Node 2,buffer[0][0]=7.500000
it=2,k=1,Node 2,buffer[1][0]=10.500000
it=2,k=1,Node 0,buffer[1][0]=10.500000
it=2,k=1,Node 1,buffer[1][0]=10.500000

My question is: Why did I just change the way to assign values can alter the correctness of the outputs?

coco
  • 71
  • 6
  • 1
    [don't cast malloc](https://stackoverflow.com/questions/605845/do-i-cast-the-result-of-malloc) – Barmar Aug 09 '18 at 21:00
  • 1
    There's no need to use `calloc()` if you're immediately going to initialize all the array elements. It will initialize them all to zero, which is unnecessary since you're going to replace them. – Barmar Aug 09 '18 at 21:02
  • @WeatherVane why no sense? I used `buffer = p` at first but got a wrong output; but when assigning values one by one using a for loop gives the correct answer. Don't you think we have to understand why? And that's why I ask here to seek for your help. – coco Aug 09 '18 at 21:10
  • Because `i` has not been initialised. So comparing it is *undefined behaviour*. – Weather Vane Aug 09 '18 at 21:11
  • @WeatherVane Sorry, that's a typo. I modified i=0 – coco Aug 09 '18 at 21:12
  • Now you have changed the question. Please post the [Minimal, Complete, and Verifiable example](http://stackoverflow.com/help/mcve) that shows the problem. Then you don't suffer from "typo" problems or "something like my code" which waste everyones time. – Weather Vane Aug 09 '18 at 21:14
  • @Barmar But initialization is a good habit, isn't it? – coco Aug 10 '18 at 02:35
  • Doing things without thinking is a bad habit. I see lots of code like `char *foo = NULL;` then a few lines later `foo = callSomeFunction();`. The initialization suggests that the variable could be used before being assigned, but it was just done out of habit. – Barmar Aug 10 '18 at 02:40
  • On the other hand, if you have a choice between unnecessary initialization and forgetting to initialize something that needs it, the first is preferable, since it won't lead to erroneous behavior. – Barmar Aug 10 '18 at 02:41
  • Note, though, that `calloc()` doesn't guarantee to create null pointers. It fills the memory with zero bit patterns, which isn't guaranteed to be how null pointers are represented (although it happens to be true on most common architectures). – Barmar Aug 10 '18 at 02:43

2 Answers2

1

I think your question boils down to, why is

buffer = p;

different to

for (j=0; j<nz; j++){
     for (i=0; i<nx; i++){
          buffer[j][i] = p[j][i];  
     }
}   

buffer = p is a shallow copy, while the for loop is a deep copy. In a shallow copy, we are changing where buffer stores all it's elements. In a deep copy, we preserve where buffer stores all it's elements, and then copy all of p's elements into here.

The reason the behaviour is unexpected is because in the shallow copy case, where buffer and p store all their elements overlaps so the broadcasts and assignments are writing to the same memory.

  • But in the second `it` loop, `p` values also change, why `buffer` still keep the original values? – coco Aug 09 '18 at 22:01
1

First, you need to allocate your (2D) arrays in contiguous memory.

Otherwise you only broadcast the first row and cause buffer overflow that lead to undefined behavior.

Be aware you already posted a bunch of questions and the root cause is always the same : you need to allocate your arrays in contiguous memory (and there are tons of example on SO).

Gilles Gouaillardet
  • 8,193
  • 11
  • 24
  • 30