1

I am trying to partition an n*n matrix into p rows, n might not be divisible by p. So I need to partition into different size of rows, the easiest way is to send n/p rows to each processor except the last one which takes n/p+n%p.

Here is my code:

using namespace std;
int main(int argc, char* argv[])
{
    int my_rank = 0;
    int comm_size = 0;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

    double *Adata;
    double **adjArray;
    int n;


    if (my_rank == 0){
        n=6;
        Adata = (double *)malloc(sizeof(double)*n*n);
        adjArray = (double **)malloc(sizeof(double*) * n);

        for(int i = 0; i < n; i++) {
            adjArray[i] = &(Adata[i*n]);
        }

        int k=0;
        for (int i=0; i<n; i++) {
            for (int j=0; j<n; j++) {
                adjArray[i][j]=k;
                k++;
            }
        }

        cout<<"---Adjacancy Matrix:"<<endl;

        for (int i=0; i<n; i++) {
            for (int j=0; j<n; j++) {
                if(adjArray[i][j]==INT_MAX)
                {
                    cout<< " - ";
                }else
                {
                    cout<< adjArray[i][j]<<" ";
                }
            }
            cout<<endl;
        }
        cout<<"----------------------------------------------------"<<endl;
    }


    //---------------------------------------------------------
    // Broadcasting the data among the processors.

    MPI_Bcast( &n,1,MPI_INT,0,MPI_COMM_WORLD);

    //---------------------------------------------------------
    // Scatter the rows to each processor

    int rem = 0; // elements remaining after division among processes
    int sum = 0; // Sum of counts. Used to calculate displacements
    if(my_rank==comm_size-1) rem=n%comm_size;

    int *displs = (int *)malloc(comm_size*sizeof(int));
    int *sendcounts = (int *)malloc(comm_size*sizeof(int));
    int numPerProc=n/comm_size;
    int receive_buffer[numPerProc+rem];

    for (int i=0; i<comm_size-1; i++) {
        sendcounts[i]=(n)/comm_size;
        displs[i] = sum;
        sum += sendcounts[i];
    }
    sendcounts[comm_size-1]=n/comm_size+rem;
    displs[comm_size-1]=sum;

    MPI_Datatype strip;
    /* defining a datatype for sub-matrix */
    MPI_Type_vector(numPerProc, n, n, MPI_DOUBLE, &strip);
    MPI_Type_commit(&strip);

    double **strip_A,*stripdata;

    stripdata = (double *)malloc(sizeof(double)*numPerProc*n);
    strip_A = (double **)malloc(sizeof(double*)*numPerProc);
    for(int i= 0; i< numPerProc+rem; i++) {
        strip_A[i] = &(stripdata[i*n]);
    }

    MPI_Scatterv(Adata, sendcounts, displs, strip, &(strip_A[0][0]), sendcounts[my_rank], strip, 0, MPI_COMM_WORLD);


    for(int i = 0; i < sendcounts[my_rank]; i++) {
        if(i == 0) {
            printf("rank = %d\n", my_rank);
        }
        for(int j = 0; j < n; j++) {

            if(strip_A[i][j]==INT_MAX)
            {
                cout<< " - ";
            }else
            {
                cout<< strip_A[i][j]<<" ";
            }
        }
        printf("\n");
    }

    MPI_Finalize();

    return 0;
}

Unfortunately, it doesn`t work once n is not equal to p. for example once I try p=4, the output is:

[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
---Adjacancy Matrix:
0 1 2 3 4 5 
6 7 8 9 10 11 
12 13 14 15 16 17 
18 19 20 21 22 23 
24 25 26 27 28 29 
30 31 32 33 34 35 
----------------------------------------------------
rank = 0
0 1 2 3 4 5 
rank = 2
12 13 14 15 16 17 
rank = 1
6 7 8 9 10 11 
rank = 3
18 19 20 21 22 23 
6.95287e-310 6.95287e-310 6.95287e-310 1.99804e+161 8.11662e+217 3.25585e-86 
1.94101e-80 2.68185e-80 4.81827e+151 1.39957e-306 2.33584e-314 6.95287e-310 

Any help is appreciated! Thank you!

Sarah
  • 133
  • 11

1 Answers1

1

The derived datatype for one row should be built like this (note count is 1 and not numPerProc)

MPI_Type_vector(1, n, n, MPI_DOUBLE, &strip);

Note a simpler option is

MPI_Type_contiguous(n, MPI_DOUBLE, &strip);

There are other issues as well

  • sendcounts and displs are only relevant on rank 0, and sendcounts[comm_size-1] is incorrect on that rank
  • stripdata and strip_A have the wrong size on the last rank (e.g. you allocate numPerProc rows, but access numPerProc+rem rows).
Gilles Gouaillardet
  • 8,193
  • 11
  • 24
  • 30
  • Thank you for your response. Still, it is not working :(. Why do you say it should be 1, I want to send more than 1 row to each processor – Sarah Mar 29 '18 at 15:51
  • I edited my answer, the datatype should be for one row, and the number of rows to be sent is in `sendcounts` – Gilles Gouaillardet Mar 30 '18 at 01:11