3

I have the following code:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>

void pgmsize (char *filename, int *nx, int *ny);
void pgmread (char *filename, void *vx, int nx, int ny);
void pgmwrite(char *filename, void *vx, int nx, int ny);

#define FILENAME "edge768x768.pgm"
#define M 768
#define N 768

#define P 2
#define DEL 1
#define CHECKFREQ 500

#define MAXITER   5000

int main(int argc, char **argv){

    MPI_Init(&argc, &argv);
    int rank,size;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    FILE *fp;
    char outname[64];
    sprintf(outname,"out_%d.dat",P);
    fp = fopen( outname, "w" );
    fprintf(fp,"del,iter,avg\n");

    MPI_Status status;
    MPI_Request req;
    MPI_Comm cart_comm;

    int dims[2] = {0,0};
    int periods[2] = {0,0};
    int coords[2];
    MPI_Dims_create(P, 2, dims);
    MPI_Cart_create(MPI_COMM_WORLD,2,dims,periods,1, &cart_comm);
    MPI_Cart_coords(cart_comm, rank, 2, coords);
    int Px = dims[0];
    int Py = dims[1];

    int i,j,locali,localj,iter;
    printf("%d,%d,%d\n",rank,M/Px,N/Py);
    double masterarray[M][N];
    double outarray[M][N];
    double local_array[M/Px][N/Py];
    double local_padded_array[M/Px+2][N/Py+2];
    double old[M/Px+2][N/Py+2];

    printf("%d,%d,%d\n",rank,Px,Py);



    fclose(fp);
    MPI_Finalize();
}

When compiled and run gives the following error:

mpiexec noticed that process rank 0 with PID 28696 on node My-MacBook-Air exited on signal 11 (Segmentation fault: 11).

However, if I change the declaration of the master array to:

float masterarray[M][N]

it compiles and runs fine. Can anyone help? Also runs fine if all arrays are declared as floats. Perhaps a better understanding of the difference between the two types may help me understand what is going on.


Having continued to write my code using all float arrays, I have now come up with another issue relating to the arrays. My code is now:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>

void pgmsize (char *filename, int *nx, int *ny);
void pgmread (char *filename, void *vx, int nx, int ny);
void pgmwrite(char *filename, void *vx, int nx, int ny);

#define FILENAME "edge768x768.pgm"
#define M 768
#define N 768

#define P 2
#define DEL 1
#define CHECKFREQ 500

#define MAXITER   5000

int main(int argc, char **argv){

    MPI_Init(&argc, &argv);
    int rank,size;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    FILE *fp;
    char outname[64];
    sprintf(outname,"out_%d.dat",P);
    fp = fopen( outname, "w" );
    fprintf(fp,"del,iter,avg\n");

    MPI_Status status;
    MPI_Request req;
    MPI_Comm cart_comm;

    int dims[2] = {0,0};
    int periods[2] = {0,0};
    int coords[2];
    MPI_Dims_create(P, 2, dims);
    MPI_Cart_create(MPI_COMM_WORLD,2,dims,periods,1, &cart_comm);
    MPI_Cart_coords(cart_comm, rank, 2, coords);
    int Px = dims[0];
    int Py = dims[1];

    int i,j,locali,localj,iter;
    float masterarray[M][N];
    float outarray[M][N];
    float local_array[M/Px][N/Py];
    float local_padded_array[M/Px+2][N/Py+2];
    float old[M/Px+2][N/Py+2];
    float new[M/Px+2][N/Py+2];



    if (rank == 0){
        pgmread(FILENAME, masterarray, M, N);
    }

    MPI_Bcast(masterarray,M*N,MPI_FLOAT,0,MPI_COMM_WORLD);

    for(i=0;i<M/Px;i++){
        for(j=0;j<N/Py;j++){

          locali = i + coords[0] * M/Px;
          localj = j + coords[1] * N/Py;


          local_array[i][j] = masterarray[locali][localj];

        }
    }

    for (i = 0;i<M/Px +2;i++){
        for (j = 0;j<N/Py +2;j++){
            local_padded_array[i][j] = 255.0;
        }
    }

    for (i = 1;i<M/Px +1;i++){
        for (j = 1;j<N/Py +1;j++){
            local_padded_array[i][j] = local_array[i-1][j-1];
        }
    }

    for (i = 0;i<M/Px +2;i++){
        for (j = 0;j<N/Py +2;j++){
            old[i][j] = 255.0;
        }
    }

    int down_rank,up_rank,right_rank,left_rank;
    MPI_Cart_shift(cart_comm,0,1,&right_rank,&left_rank);
    MPI_Cart_shift(cart_comm,1,1,&down_rank,&up_rank);

    MPI_Datatype col;
    MPI_Type_vector(M/Px,1,N/Py+2,MPI_FLOAT,&col);
    MPI_Type_commit(&col);

    float globaldel = 1000.0;
    float globalsum = 0.0;
    double time1 = MPI_Wtime();
    for(iter = 0;iter < MAXITER;iter++){

        MPI_Issend(&old[1][N/Py], 1,col,up_rank, 0,cart_comm, &req);
        MPI_Recv(&old[1][0], 1,col, down_rank, 0,cart_comm, &status);

        MPI_Issend(&old[1][1], 1,col ,down_rank, 0,cart_comm, &req);
        MPI_Recv(&old[1][N/Py+1], 1,col ,up_rank, 0,cart_comm, &status);

        MPI_Issend(&old[M/Px][1], N/Py,MPI_FLOAT,left_rank, 0,cart_comm, &req);
        MPI_Recv(&old[0][1], N/Py,MPI_FLOAT, right_rank, 0,cart_comm, &status);

        MPI_Issend(&old[1][1], N/Py,MPI_FLOAT,right_rank, 0,cart_comm, &req);
        MPI_Recv(&old[M/Px+1][1], N/Py,MPI_FLOAT, left_rank, 0,cart_comm, &status);

        for (i = 1;i<M/Px +1;i++){
        for (j = 1;j<N/Py +1;j++){
                new[i][j] = 0.25*(old[i][j-1]+old[i][j+1]+old[i-1][j]+old[i+1][j] - local_padded_array[i][j]);
            }
         }
    }

    printf("%d,%d,%d\n",rank,M/Px,N/Py);
    fclose(fp);
    MPI_Finalize();
}

which when run gives another segmentation error that seems to be corrected if in the final loop I do not set elements in the new array. So it seems that there is some major issue in the creation of my arrays! The code also seems to run fine if #define P 3 is used rather than #define P 2

Wesley Bland
  • 8,816
  • 3
  • 44
  • 59

1 Answers1

2

You're probably running out of memory on the stack. If you're not familiar with the difference between the stack and the heap, check out: What and where are the stack and heap?. If I change to use the heap, it was fine.

To allocate memory from the heap for your arrays, use:

    double *masterarray = malloc(sizeof(double) * M * N);
    double *outarray = malloc(sizeof(double) * M * N);
    double *local_array = malloc(sizeof(double) * M/Px * N/Py);
    double *local_padded_array = malloc(sizeof(double) * (M/Px+2) * (N/Py+2));
    double *old = malloc(sizeof(double) * (M/Px+2) * (N/Py+2));

Here's my version of your original code that runs fine (some extra junk removed to avoid creating files and whatnot):

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>

#define M 768
#define N 768

#define P 2
#define DEL 1
#define CHECKFREQ 500

#define MAXITER   5000

int main(int argc, char **argv)
{
    MPI_Init(&argc, &argv);
    int rank,size;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    MPI_Status status;
    MPI_Request req;
    MPI_Comm cart_comm;

    int dims[2] = {0,0};
    int periods[2] = {0,0};
    int coords[2];
    MPI_Dims_create(P, 2, dims);
    MPI_Cart_create(MPI_COMM_WORLD,2,dims,periods,1, &cart_comm);
    MPI_Cart_coords(cart_comm, rank, 2, coords);
    int Px = dims[0];
    int Py = dims[1];

    int i,j,locali,localj,iter;
    printf("%d,%d,%d\n",rank,M/Px,N/Py);

    double *masterarray = malloc(sizeof(double) * M * N);
    double *outarray = malloc(sizeof(double) * M * N);
    double *local_array = malloc(sizeof(double) * M/Px * N/Py);
    double *local_padded_array = malloc(sizeof(double) * (M/Px+2) * (N/Py+2));
    double *old = malloc(sizeof(double) * (M/Px+2) * (N/Py+2));

    if (masterarray == NULL) fprintf(stderr, "MASTERARRAY == NULL");
    if (outarray == NULL) fprintf(stderr, "OUTARRAY == NULL");
    if (local_array == NULL) fprintf(stderr, "LOCAL_ARRAY == NULL");
    if (local_padded_array == NULL) fprintf(stderr, "LOCAL_PADDED_ARRAY == NULL");
    if (old == NULL) fprintf(stderr, "OLD == NULL");

    printf("%d,%d,%d\n",rank,Px,Py);

    MPI_Finalize();

    return 0;
}
Community
  • 1
  • 1
Wesley Bland
  • 8,816
  • 3
  • 44
  • 59
  • If I where to define Px and Py at the top of the file then all the arrays would be of known size at compile time? and then I can declare the arrays the way I have? If I do that it works fine as long as I use more than 2 procs... – Josh Greenhalgh May 06 '15 at 15:34
  • That's true. As long as all of your values are declared as macros. You still might run out of memory though depending on how many processes you use on a single node. – Wesley Bland May 06 '15 at 15:35
  • So I am running on my mac which has 4 cores...it will not work for Px=1,Py=2 but will work for Px=1,Py=3 and Px=2,Py=2 – Josh Greenhalgh May 06 '15 at 15:37
  • "*You can't allocate dynamic arrays in C in the way you're doing.*" true for C89. Since C99 VLAs are around and the code shown is perfectly valid C. – alk May 06 '15 at 15:39
  • Bigger `Px` and `Py` values mean you allocate less memory because those are divisors. If you're really up against the wall like that, what's your aversion to using the heap? Speed probably isn't an issue anymore, especially if you're running at small scale. – Wesley Bland May 06 '15 at 15:39
  • Fair enough @alk, I barely use code written with C99 so I haven't seen people widely using that. I've updated the answer. – Wesley Bland May 06 '15 at 15:40
  • 1
    To confirm Wes's hunch about stack usage, gcc tells us this, if you add the -Wstack-usage flag: "code_bigstack.c:128:1: warning: stack usage might be unbounded [-Wstack-usage=]" } – Rob Latham May 06 '15 at 20:31