0

I am working on a project where we import a 32x32 or 64x64 PBM image file. Sample is provided below:

P1
# CREATOR: GIMP PNM Filter Version 1.1
32 32
01111110000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000001000000000000
00000000000001100000000000000000
00001100000000100011100000000000
00001100000000000000000000000000
00000011000000000000000000000000
00000011000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000011100000
01000101001000000000000000000000
01000101010000011100000000000000
01000101100000111000000000000000
01000101010000000000000000000000
01000101001000000000000000000000
00111001001000000000000001000000
00000000000000000000000001000000
00000000000000000000000001000000
00000000000000000000000000000000
00000000010000000000000000000000
00000000001000000000000000000000
00000000111000000000000000000000
00000000000000000000000110000000
00000000000000000000001001000000
00110000000000000000000110000000
01100000000000000110000000001000
00100000000000000110000000001100
00000000000000011000000000001100
00000000000000011000000000000100
00000000000000000000000000000000

This is read into a 1D array and processed sequentially. I am attempting to break the 1D array into multiple arrays in a 4x4 pattern. Once broken into "chunks" the temporary smaller arrays will be distributed to multiple processing elements. After each smaller chunk has been processed the "chunks" will be sent back to the primary node to be reassembled.

This is the complete code which is working in a sequential manner:

/*  life.c

    Play Conway's Game of Life

    Reads a P1 file in, outputs a P1 file.

    Does as many timesteps as stated on the command line.

    Uses a classic 1-element fake zone internally.

    Jan. 30, 2013 by H. Dietz
*/

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>


unsigned char *
read_P1(int *xdim, int *ydim)
{
    register int c, x, y;
    register unsigned char *p;

    if ((c = getchar()) != 'P') {
pbm_bad:
        fprintf(stderr, "Bad PBM input\n");
        exit(1);
    }
    if ((c = getchar()) != '1') goto pbm_bad;
    c = getchar();

#define Eat_Space \
    while ((c == ' ') || \
           (c == '\t') || \
           (c == '\n') || \
           (c == '\r') || \
           (c == '#')) { \
        if (c == '#') while ((c = getchar()) != '\n') ; \
        c = getchar(); \
    }

    Eat_Space;      /* Eat white space and comments */

#define Get_Number(n) \
    { \
        if ((c < '0') || (c > '9')) goto pbm_bad; \
 \
        n = (c - '0'); \
        c = getchar(); \
        while ((c >= '0') && (c <= '9')) { \
            n *= 10; \
            n += (c - '0'); \
            c = getchar(); \
        } \
    }

    Get_Number(*xdim);  /* Get image width */

    Eat_Space;      /* Eat white space and comments */
    Get_Number(*ydim);  /* Get image height */

    p = ((unsigned char *)
         calloc(((*xdim + 2) * (*ydim + 2)),
            sizeof(unsigned char)));
    if (p == 0) goto pbm_bad;

    Eat_Space;

    for (y=0; y<*ydim; ++y) {
        for (x=0; x<*xdim; ++x) {
            Eat_Space;
            switch (c) {
            case '1':   p[x+1+((y+1)*(*xdim+2))] = 1; break;
            case '0':   /* 0 from calloc() */ break;
            default:    goto pbm_bad;
            }
            c = getchar();
        }
    }

    return(p);
}

#undef  Eat_Space
#undef  Get_Number


write_P1(register int xdim,
register int ydim,
register unsigned char *p)
{
    register int x, y, pos = 0;

    printf("P1\n"
           "# CREATOR: P1IO\n"
           "%d %d\n",
           xdim,
           ydim);

    for (y=0; y<ydim; ++y) {
        for (x=0; x<xdim; ++x) {
            int c = "01"[ p[x+1+((y+1)*(xdim+2))] != 0 ];
            putchar(c);

            /* Keep lines even matrices */
            if ( (++pos)%xdim == 0) {
                putchar('\n');
            }
        }
    }
    putchar('\n');
}


int main(int argc, char **argv)
{
    if(MPI_Init(&argc, &argv) != MPI_SUCCESS){
        exit(1);
    }

    int xdim, ydim;
    int iproc, nproc;
    register unsigned char *p, *q, *masterP;
    register int x, y, i, j, t, divisions, timesteps;

    divisions = 4;

    MPI_Comm_size(MPI_COMM_WORLD, &nproc); //nproc = number of PE
    MPI_Comm_rank(MPI_COMM_WORLD, &iproc); //iproc = position out of nproc

    if (argc != 2) {
        fprintf(stderr, "Usage: %s timesteps\n", argv[0]);
        exit(2);
    }
    timesteps = atoi(argv[1]);


    /* Carve p into 4x4 and send to 15 processing elements. PE0 will
    handle one chunk on its own */
    //if(iproc == 0){
        int masterx, mastery;
        /* Read the initial state image */
        masterP = read_P1(&masterx, &mastery);
        xdim = (masterx/divisions);
        ydim = (mastery/divisions);


        /* Make a temporary array to send each chunk of the image */
        p = ((unsigned char *)calloc((((xdim)+2) * ((ydim)+2)), 
            sizeof(unsigned char)));
        if (p == 0) {
        fprintf(stderr, "Calloc failed\n");
        exit(3);
        }

        printf("Check master P\n");
        write_P1(masterx, mastery, masterP);


        int loopx , loopy;
        int cnt = 0;
        for(loopy = 0; loopy < divisions; ++loopy){
            for(loopx = 0; loopx < divisions; ++loopx){
                x=1;
                //Fill p
                for (j=(loopy*ydim)+1; j<((loopy+1)*ydim+2); ++j){
                    for( i=(loopx*xdim)+1; i<((loopx+1)*xdim+2); ++i){
                        //printf("j: %d i: %d\n",j,i);
                        p[x]=masterP[i+(loopy*j)+1];
                        printf("%d",p[x]);
                        x++;
                    }
                }
                //Print P contents
                printf("\nIteration Y:%d X:%d\n",loopy, loopx);
                write_P1(xdim, ydim, p);

            }
        }


        //MPI_Send(p, sizeof(temp), MPI_UNSIGNED_CHAR, pe_num, 0, MPI_COMM_WORLD);
        //++pe_num;

    //}/* End IProc 0's business */


    /* Make a target image (with blank fake zone) */
    q = ((unsigned char *)
         calloc(((xdim + 2) * (ydim + 2)),
            sizeof(unsigned char)));
    if (q == 0) {
        fprintf(stderr, "Calloc failed\n");
        exit(3);
    }

    /* Iterate over the timesteps */
    for (t=0; t<timesteps; ++t) {
#ifdef  VERBOSE
        fprintf(stderr, "Timestep %d...\n", t);
#endif

        for (y=0; y<ydim; ++y) {
            for (x=0; x<xdim; ++x) {

#define FAKE(P, X, Y) P[((X)+1)+(((Y)+1)*(xdim+2))]

                register int live = FAKE(p, x-1, y-1) +
                            FAKE(p, x, y-1) +
                            FAKE(p, x+1, y-1) +
                            FAKE(p, x-1, y) +
                            FAKE(p, x+1, y) +
                            FAKE(p, x-1, y+1) +
                            FAKE(p, x, y+1) +
                            FAKE(p, x+1, y+1);

                /* Apply Conway's rules...
                   (yes, I know this is clever code!)
                */
                FAKE(q, x, y) = ((live == 3) ||
                         ((live == 2) &&
                          FAKE(p, x, y)));
            }
        }

        /* Swap our notion of p and q...
           this saves us copying every timestep
        */
        {
            register unsigned char *pt = p;
            p = q;
            q = pt;
        }
    } /* End iterate timestep for block */

    /* Done; write-out results */
    write_P1(masterx, mastery, q);
    exit(0);
}

The following should break the large array masterP into 4x4 "chunks":

int loopx , loopy;
int cnt = 0;
for(loopy = 0; loopy < divisions; ++loopy){
    for(loopx = 0; loopx < divisions; ++loopx){
        x=1;
        //Fill p
        for (j=(loopy*ydim)+1; j<((loopy+1)*ydim+2); ++j){
            for( i=(loopx*xdim)+1; i<((loopx+1)*xdim+2); ++i){
                //printf("j: %d i: %d\n",j,i);
                p[x]=masterP[i+(loopy*j)+1];
                printf("%d",p[x]);
                x++;
            }
        }
        //Print P contents
        printf("\nIteration Y:%d X:%d\n",loopy, loopx);
        write_P1(xdim, ydim, p);

    }
}

However, my resulting arrays are all 0. Is it improper to say

  p[x]=masterP[i+(loopy*j)+1];

Array p is an array created to be a 4x4 "chunk" of masterP.

Sample output:

P1  
# CREATOR: GIMP PNM Filter Version 1.1
32 32
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000001000000000000
00000000000001100000000000000000
00001100000000100011100000000000
00001100000000000000000000000000
00000011000000000000000000000000
00000011000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000011100000
01000101001000000000000000000000
01000101010000011100000000000000
01000101100000111000000000000000
01000101010000000000000000000000
01000101001000000000000000000000
00111001001000000000000001000000
00000000000000000000000001000000
00000000000000000000000001000000
00000000000000000000000000000000
00000000010000000000000000000000
00000000001000000000000000000000
00000000111000000000000000000000
00000000000000000000000110000000
00000000000000000000001001000000
00110000000000000000000110000000
01100000000000000110000000001000
001000000000000001100000000000001100
00000000000000011000000000000100
00000000000000000000000000000000
Check master P
P1
# CREATOR: P1IO
32 32
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000001000000000000
00000000000001100000000000000000
00001100000000100011100000000000
00001100000000000000000000000000
00000011000000000000000000000000
00000011000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000011100000
01000101001000000000000000000000
01000101010000011100000000000000
01000101100000111000000000000000
01000101010000000000000000000000
01000101001000000000000000000000
00111001001000000000000001000000
00000000000000000000000001000000
00000000000000000000000001000000
00000000000000000000000000000000
00000000010000000000000000000000
00000000001000000000000000000000
00000000111000000000000000000000
00000000000000000000000110000000
00000000000000000000001001000000
00110000000000000000000110000000
01100000000000000110000000001000
00100000000000000110000000001100
00000000000000011000000000001100
00000000000000011000000000000100
00000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:0 X:0
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:0 X:1
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:0 X:2
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:0 X:3
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:1 X:0
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:1 X:1
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:1 X:2
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:1 X:3
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:2 X:0
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:2 X:1
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:2 X:2
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:2 X:3
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:3 X:0
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:3 X:1
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:3 X:2
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

000000000000000000000000000000000000000000000000000000000000000000000000000000000
Iteration Y:3 X:3
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000

P1
# CREATOR: P1IO
32 32
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000100000
11011100011111100111110000000000
11110001111110011111000111111000
00000000000101111100000000000111
00000000000000000010000000110111
01111110010000000100000001111110
00000100000000100000001101110000
00000000000000000000000000000000
00000000000000000001111100011111
01111100011111000111110001111100
11110000111100011111000111110001
11000111111001111100011111100111
00011111100111110001111110011111
01111110011111000111111001111100
11111001111100011111100111110001
11100111110001111110011111000111
10011111000111111001111100011111
01111100000000000111110001111110
11110001111110000000000000000100
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000

Thank you for your help!

Alex Hendren
  • 446
  • 1
  • 5
  • 18
  • There's a fairly lengthy discussion on the topic of scattering/gathering pieces of a 2d array in the answer here: http://stackoverflow.com/questions/9269399/sending-blocks-of-2d-array-in-c-using-mpi/9271753#9271753] – Jonathan Dursi Feb 11 '13 at 14:50

3 Answers3

3

You can do that in a much more elegant way using MPI derived datatypes. Take a look at MPI_Type_vector. It creates a derived datatype which instructs MPI to take count blocks of blocklength elements of the old datatype each with blocks being separated by stride elements:

block 1      block 2      block 3 ..... block count
   |            |            |             |
   V            V            V             V
+-----+------+-----+------+-----+-------+-----+------
|XXXXX|      |XXXXX|      |XXXXX|       |XXXXX|
+-----+------+-----+------+-----+-------+-----+------
|<- stride ->|            |     |
                        ->|-----|<-
                        blocklength

Now if you set count to 4, blocklength to 4, and stride to the length of one row, that's exactly a datatype that would take a 4x4 block out of the big array. You can use this datatype in send and receive operations and with some MPI type massage magic (specifically resizing the type with MPI_Type_create_resized) you can even use it in scatter/gather operations.

For example, to send the second block on the second 4-row, assuming that each row has row4cols 4-columns:

MPI_Datatype block4type;

MPI_Type_vector(4, 4, 4*rows4cols, MPI_UNSIGNED_CHAR, &block4type);
MPI_Type_commit(&block4type);

MPI_Send(&masterP[(2-1)*(4*4*rows4cols) + (2-1)*4], 1, block4type,
         dest_rank, 0, MPI_COMM_WORLD);

This would send a message that has 16 elements of type MPI_UNSIGNED_CHAR. The receiver can simply receive this into an 4x4 array:

unsigned char block[4][4];
MPI_Status status;

MPI_Recv(block, 4*4, MPI_UNSIGNED_CHAR,
         master_rank, 0, MPI_COMM_WORLD, &status);

This works in the other direction too - the worker sends 16 elements of MPI_UNSIGNED_CHAR and the master uses block4type to receive the data as a 4x4 block in the appropriate position in masterP.

Hristo Iliev
  • 72,659
  • 12
  • 135
  • 186
2

You never increment x. It looks like you intended to. So you are always assigning data to p[1]. It looks like instead of assigning to p[x], you really should assign to p[cnt].

paddy
  • 60,864
  • 6
  • 61
  • 103
  • Oops! It looks like I forgot to fully revise my code after some debugging exercises. Unfortunately, I am still having the same issue. I have edited my post to include the example output. Thank you for taking the time to notice my lack of attention to detail!! – Alex Hendren Feb 11 '13 at 02:50
1

Thank you to everyone for your support! Certainly helped push me in the right direction. I finally sorted through some issues deriving from my indices.

The 1d array should have 1024 elements for a 32x32 image, however our processing function sets up a "fake zone" or buffer around the 4x4 chunks of the 32x32 image. With fake zones the dimensions are (x+2)x(y+2), thus for 32x32 image I should expect 1156 elements in the 1d array. Previously I was not properly handling the wraparound and extra buffer space that the fake zones added.

I used 4 for loops to iterate through the larger 32x32 image to generate 16 8x8 chunks of the image. I believe this would have been much easier using 2d matrices, however our processing functions are setup to accept 1d arrays. There would have been more overhead in converting the processing functions to accept 2d arrays compared to deriving the correct indexing pattern for the 1d manipulation. Below provides the 4 for loops used:

    int loopx , loopy, pe_num;
int cnt = 0;

for(loopy = 0; loopy < divisions; loopy++){
    for(loopx = 0; loopx < divisions; loopx++){
        //Fill p
        cnt=(xdim+3); //Reinitialize the first index for every new p to xdim+2
        for (j=0; j<(ydim); j++){
            for( i=0; (i<xdim); i++){
                p[cnt]=masterP[((masterx+2)+(j*(masterx+2))+(loopy*(masterx+2)*ydim)+(loopx*xdim)+i+1)];
                //printf("%d",masterP[((masterx+2)+(j*(masterx+2))+(loopy*(masterx+2)*ydim)+(loopx*xdim)+i)]);  
                if( ((cnt+1)%(xdim+2))==0) //skip over fake zone
                    cnt = cnt+3;
                else
                    cnt++;
            }
        }
        pe_num = ((loopy*divisions)+loopx);
        //Print "chunk" p contents
        printf("\nPE #%d\n",pe_num);
        write_P1(xdim, ydim, p);
        //MPI_Send(p, sizeof(p), MPI_UNSIGNED_CHAR, pe_num, 0, MPI_COMM_WORLD);
    }
} 

For Input:

P1
# CREATOR: GIMP PNM Filter Version 1.1
32 32
01111111111110000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000001000000000000
00000000000001100000000000000000
00001100000000100011100000000000
00001100000000000000000000000000
00000011000000000000000000000000
00000011000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000011100000
01000101001000000000000000000000
01000101010000011100000000000000
01000101100000111000000000000000
01000101010000000000000000000000
01000101001000000000000000000000
00111001001000000000000001000000
00000000000000000000000001000000
00000000000000000000000001000000
00000000000000000000000000000000
00000000010000000000000000000000
00000000001000000000000000000000
00000000111000000000000000000000
00000000000000000000000110000000
00000000000000000000001001000000
00110000000000000000000110000000
01100000000000000110000000001000
00100000000000000110000000001100
00000000000000011000000000001100
00000000000000011000000000000100
00000000000000000000000000000000

I successfully break into 16 chunks forming a 4x4 grid of the original image:

Check master P
P1
# CREATOR: P1IO
32 32
01111111111110000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000001000000000000
00000000000001100000000000000000
00001100000000100011100000000000
00001100000000000000000000000000
00000011000000000000000000000000
00000011000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000011100000
01000101001000000000000000000000
01000101010000011100000000000000
01000101100000111000000000000000
01000101010000000000000000000000
01000101001000000000000000000000
00111001001000000000000001000000
00000000000000000000000001000000
00000000000000000000000001000000
00000000000000000000000000000000
00000000010000000000000000000000
00000000001000000000000000000000
00000000111000000000000000000000
00000000000000000000000110000000
00000000000000000000001001000000
00110000000000000000000110000000
01100000000000000110000000001000
00100000000000000110000000001100
00000000000000011000000000001100
00000000000000011000000000000100
00000000000000000000000000000000


PE #0
P1
# CREATOR: P1IO
8 8
01111111
00000000
00000000
00000000
00000000
00001100
00001100
00000011


PE #1
P1
# CREATOR: P1IO
8 8
11111000
00000000
00000000
00000000
00000110
00000010
00000000
00000000


PE #2
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00010000
00000000
00111000
00000000
00000000


PE #3
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000


PE #4
P1
# CREATOR: P1IO
8 8
00000011
00000000
00000000
00000000
01000101
01000101
01000101
01000101


PE #5
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00100000
01000001
00000011
01000000


PE #6
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
01000000
00000000
00000000


PE #7
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
01100000
00000000
00000000
00000000
00000000


PE #8
P1
# CREATOR: P1IO
8 8
01000101
00111001
00000000
00000000
00000000
00000000
00000000
00000000


PE #9
P1
# CREATOR: P1IO
8 8
00100000
00100000
00000000
00000000
00000000
01000000
00100000
01100000


PE #10
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000


PE #11
P1
# CREATOR: P1IO
8 8
00000000
01000000
01000000
01000000
00000000
00000000
00000000
00000000


PE #12
P1
# CREATOR: P1IO
8 8
00000000
00000000
00110000
01100000
00100000
00000000
00000000
00000000


PE #13
P1
# CREATOR: P1IO
8 8
00000000
00000000
00000000
00000000
00000000
00000001
00000001
00000000


PE #14
P1
# CREATOR: P1IO
8 8
00000001
00000010
00000001
01100000
01100000
00000000
00000000
00000000


PE #15
P1
# CREATOR: P1IO
8 8
10000000
01000000
00000000
00001000
00001100
00001100
00000100
00000000

Thank you again to those who contributed. Hristo, I am very interested to try the recommendation you made, I believe that is a more efficient method!

Alex Hendren
  • 446
  • 1
  • 5
  • 18