glad to be here. Badly stuck newbie seeking for help.
Using MPI, im trying to distribute a random sized 2D array to a random number of processes, as evenly as possible. So for example, suppose we have a 10x10 array and 9 processes. What im trying to achieve is the following:
________________________
| | | |
| P0 | P1 | P2 |
| 4x4 | 4x3 | 4x3 |
|________|______|______|
| P3 | P4 | P5 |
| 3x4 | 3x3 | 3x3 |
|________|______|______|
| P6 | P7 | P8 |
| 3x4 | 3x3 | 3x3 |
|________|______|______|
So far i have managed to create the convenient process grid and calculate the correct displacements for the global array (the first element of each subarray), but i miserably fail when it comes to send said subarrays.
I've tried using both MPI_Type_vector
and MPI_create_subarray
but i must be missing something important because i cant make MPI_Scatterv
to work.
This is my code so far, keep in mind i need some nasty nested loops for the 2D arrays operations:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
// This will be command-line arguments
#define COLS 10
#define ROWS 10
int main(int argc, char **argv) {
int p, rank, i, j, proc;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// Let MPI decide the topology
int dims[2] = {0,0};
MPI_Dims_create(p, 2, dims);
int periods[2] = {0,0}; // non-periodic topology
int my_coords[2];
MPI_Comm comm_2D;
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &comm_2D);
MPI_Cart_coords(comm_2D, rank, 2, my_coords);
// Prepare the arrays and necessary info
char global_matrix[ROWS*COLS];
const int NPROWS = dims[0]; // Number of 'block' rows
const int NPCOLS = dims[1]; // Number of 'block' cols
int *num_rows; // Array containig the number of rows for each i-th process
int *num_cols; // As before
num_rows = (int *) malloc(p * sizeof(int));
num_cols = (int *) malloc(p * sizeof(int));
if (rank == 0) {
// Fill global matrix
for (i=0; i<ROWS*COLS; i++) {
global_matrix[i] = (char)i;
}
// Calculate the number of rows/cols for each process
for (i=0; i<p; i++) {
num_rows[i] = ROWS/NPROWS;
num_cols[i] = COLS/NPCOLS;
}
for (i=0; i<(ROWS%NPROWS); i++) {
for (j=0; j<NPCOLS; j++) {
num_rows[i*NPCOLS+j]++;
}
}
for (i=0; i<(COLS%NPCOLS); i++) {
for (j=0; j<NPROWS; j++) {
num_cols[i+NPROWS*j]++;
}
}
}
// Inform the processes about his local matrix size
MPI_Bcast(num_rows, p, MPI_INT, 0, comm_2D);
MPI_Bcast(num_cols, p, MPI_INT, 0, comm_2D);
// Define and initialize each local matrix
char local_matrix[num_rows[rank]*num_cols[rank]];
for (i=0; i<num_rows[rank]*num_cols[rank]; i++) {
local_matrix[i] = 0;
}
// Preparing for the Scatterv. Calculate displacements and number
// of elements to send to each process
int *disps = NULL;
int *counts = NULL;
if (rank == 0) {
disps = (int *) malloc(p * sizeof(int));
counts = (int *) malloc(p * sizeof(int));
for (i=0; i<NPROWS; i++) {
for (j=0; j<NPCOLS; j++) {
if (j == 0) {
// First block of the 'blockrow'
disps[i*NPCOLS+j] = i*COLS*num_rows[i*NPCOLS+j] + j*num_cols[i*NPCOLS+j];
} else {
// Rest of the blocks
disps[i*NPCOLS+j] = disps[i*NPCOLS+j - 1] + num_cols[i*NPCOLS+j - 1];
}
// This is VERY important and im not sure of it.
counts[i*NPCOLS+j] = 1; // 1 element to each process??
}
}
}
// Preparing the Datatypes for the Scatterv operation
MPI_Datatype tmp_matrix_t, global_matrix_t, local_matrix_t;
MPI_Type_vector(ROWS, 1, COLS, MPI_CHAR, &tmp_matrix_t);
MPI_Type_create_resized(tmp_matrix_t, 0, sizeof(char), &global_matrix_t);
MPI_Type_commit(&global_matrix_t);
MPI_Type_free(&tmp_matrix_t);
MPI_Type_vector(num_rows[rank], 1, num_cols[rank], MPI_CHAR, &tmp_matrix_t);
MPI_Type_create_resized(tmp_matrix_t, 0, sizeof(char), &local_matrix_t);
MPI_Type_commit(&local_matrix_t);
MPI_Type_free(&tmp_matrix_t);
// Doesn't work as expected
MPI_Scatterv(global_matrix, counts, disps, global_matrix_t,
local_matrix, 1, local_matrix_t, // receiving 1 element??
0, comm_2D);
// Testing/printing results
MPI_Barrier(comm_2D);
for (proc=0; proc<p; proc++) {
if (proc == rank) {
if (rank == 0) {
printf("Global matrix:\n");
for (i=0; i<ROWS; i++) {
printf("G: ");
for (j=0; j<COLS; j++) {
printf("%3d ", (int)global_matrix[i*COLS+j]);
}
printf("\n");
}
}
printf("Local matrix P%d:\n", rank);
for (i=0; i<num_rows[rank]; i++) {
printf("L%d: ", rank);
for (j=0; j<num_cols[rank]; j++) {
printf("%3d ", (int)local_matrix[i*num_cols[rank]+j]);
}
printf("\n");
}
}
}
MPI_Finalize();
return 0;
}
-- EDIT:
Going again through the code of this example https://stackoverflow.com/a/7587133/4573730 by @Jonathan Dursi im starting to think that you can not get the results i want using just MPI_Scatterv
.
Right now i think the best approach may be backing to point-to-point comms, or to use MPI_Scatterv
with four different communicators (as we have at most four different subarrays sizes) so we can have the same sized send/receive buffer.
PS: im feeling that my english is broken lol, my apologies.