I am trying to partition an n*n matrix into p rows, n might not be divisible by p. So I need to partition into different size of rows, the easiest way is to send n/p rows to each processor except the last one which takes n/p+n%p.
Here is my code:
using namespace std;
int main(int argc, char* argv[])
{
int my_rank = 0;
int comm_size = 0;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
double *Adata;
double **adjArray;
int n;
if (my_rank == 0){
n=6;
Adata = (double *)malloc(sizeof(double)*n*n);
adjArray = (double **)malloc(sizeof(double*) * n);
for(int i = 0; i < n; i++) {
adjArray[i] = &(Adata[i*n]);
}
int k=0;
for (int i=0; i<n; i++) {
for (int j=0; j<n; j++) {
adjArray[i][j]=k;
k++;
}
}
cout<<"---Adjacancy Matrix:"<<endl;
for (int i=0; i<n; i++) {
for (int j=0; j<n; j++) {
if(adjArray[i][j]==INT_MAX)
{
cout<< " - ";
}else
{
cout<< adjArray[i][j]<<" ";
}
}
cout<<endl;
}
cout<<"----------------------------------------------------"<<endl;
}
//---------------------------------------------------------
// Broadcasting the data among the processors.
MPI_Bcast( &n,1,MPI_INT,0,MPI_COMM_WORLD);
//---------------------------------------------------------
// Scatter the rows to each processor
int rem = 0; // elements remaining after division among processes
int sum = 0; // Sum of counts. Used to calculate displacements
if(my_rank==comm_size-1) rem=n%comm_size;
int *displs = (int *)malloc(comm_size*sizeof(int));
int *sendcounts = (int *)malloc(comm_size*sizeof(int));
int numPerProc=n/comm_size;
int receive_buffer[numPerProc+rem];
for (int i=0; i<comm_size-1; i++) {
sendcounts[i]=(n)/comm_size;
displs[i] = sum;
sum += sendcounts[i];
}
sendcounts[comm_size-1]=n/comm_size+rem;
displs[comm_size-1]=sum;
MPI_Datatype strip;
/* defining a datatype for sub-matrix */
MPI_Type_vector(numPerProc, n, n, MPI_DOUBLE, &strip);
MPI_Type_commit(&strip);
double **strip_A,*stripdata;
stripdata = (double *)malloc(sizeof(double)*numPerProc*n);
strip_A = (double **)malloc(sizeof(double*)*numPerProc);
for(int i= 0; i< numPerProc+rem; i++) {
strip_A[i] = &(stripdata[i*n]);
}
MPI_Scatterv(Adata, sendcounts, displs, strip, &(strip_A[0][0]), sendcounts[my_rank], strip, 0, MPI_COMM_WORLD);
for(int i = 0; i < sendcounts[my_rank]; i++) {
if(i == 0) {
printf("rank = %d\n", my_rank);
}
for(int j = 0; j < n; j++) {
if(strip_A[i][j]==INT_MAX)
{
cout<< " - ";
}else
{
cout<< strip_A[i][j]<<" ";
}
}
printf("\n");
}
MPI_Finalize();
return 0;
}
Unfortunately, it doesn`t work once n is not equal to p. for example once I try p=4, the output is:
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
---Adjacancy Matrix:
0 1 2 3 4 5
6 7 8 9 10 11
12 13 14 15 16 17
18 19 20 21 22 23
24 25 26 27 28 29
30 31 32 33 34 35
----------------------------------------------------
rank = 0
0 1 2 3 4 5
rank = 2
12 13 14 15 16 17
rank = 1
6 7 8 9 10 11
rank = 3
18 19 20 21 22 23
6.95287e-310 6.95287e-310 6.95287e-310 1.99804e+161 8.11662e+217 3.25585e-86
1.94101e-80 2.68185e-80 4.81827e+151 1.39957e-306 2.33584e-314 6.95287e-310
Any help is appreciated! Thank you!