0

I am running multiplication matrix code on my cluster and i am certain it works just fine. i even used the Scatterv and it scatters flawlessly... but i dynamically allocated my result matrix on every Process with size sendcount[world_rank] rows and z columns, i checked and these numbers are correct to the dot... only one Process allocated the array and the rest just prints an address or something.

My code does the matrix multiplication here:

count = 0;
while(count < z){

    for (i = 0; i < sendcount[world_rank]; i++){

        for(j = 0; j < y; j++){
            //printf("matrix1[%d][%d] * matrix2[%d][%d] = ", i, j, j, count);
            resultmatrix[i][count] += matrix1[i][j] * matrix2[j][count];
            //printf("%d ", resultmatrix[i][count]);

        }
        //printf("rank: %d, i: %d count: %d resultmatrix: %d\n", world_rank, i, count, resultmatrix[i][count]);

    }
    count++;
    //printf("\n");
}

When i go to print the resultmatrix at each process for testing, only my root process prints actual values... could this be a problem from my cluster or am i missing something?

Here is my whole code:

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char* argv[]){

int world_size, world_rank, i, j, tag = 777;
int root = 0;
int count = 0;

int x = atoi(argv[1]);
int y = atoi(argv[2]);
int z = atoi(argv[3]);


MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Status status;
int temp1_size = x*y;
int temp2_size = y*z;
//scatter v info
int sum = 0;
int rem1 = x%world_size;
int** matrix1;
int** matrix2;
int** resultmatrix;
int* tempM1;
int* tempM2;
int* recM1;
int* sendcount = (int*)malloc(sizeof(int) * world_size);
int* displs = (int*)malloc(sizeof(int) * world_size);

for(i = 0; i < world_size; i++){

    sendcount[i] = (x/world_size)*y;
    if(rem1 > 0){
        sendcount[i] += y;
        rem1--;
    }
    displs[i] = sum;
    sum += sendcount[i];
}
//to send my arrays over... first by bcast other by scatter...
tempM1 = (int*)malloc(sizeof(int) * temp1_size);
tempM2 = (int*)malloc(sizeof(int) * temp2_size);
// first is for 1d other is for matrix1;
//the array that will hold out scattered matrix1... 1d array...
recM1 = (int*)malloc(sizeof(int) * 500);
// matrix one at every process after scatter...




if (world_rank == root){

    FILE* Matrix1;
    FILE* Matrix2;

    Matrix1 = fopen("Matrix1.txt", "r");
    Matrix2 = fopen("Matrix2.txt", "r");
    int count = 0;
    while (count < temp1_size){
        fscanf(Matrix1, "%d", &tempM1[count]);
        count++;
    }
    //printf("count: %d, temp1_size: %d\n", count, temp1_size);
    count = 0;
    while(count < temp2_size){
        fscanf(Matrix2, "%d", &tempM2[count]);
        count++;
    }
    fclose(Matrix1);
    fclose(Matrix2);

    /*for(i = 0; i < world_size; i++)
        printf("sendcount: %d, displs: %d\n", sendcount[i], displs[i]);*/

}
MPI_Bcast(tempM2, temp2_size, MPI_INT, root, MPI_COMM_WORLD);
MPI_Scatterv(tempM1, sendcount, displs, MPI_INT, recM1, 500, MPI_INT, root, MPI_COMM_WORLD);

for(i = 0; i < world_size; i++){
    sendcount[i] /= y;
    //printf("%d at i: %d\n", sendcount[i], i);
}

matrix1 = (int**)malloc(sizeof(int*) * sendcount[world_rank]);
for(i = 0; i < sendcount[world_rank]; i++){
    matrix1[i] = (int*)malloc(sizeof(int) * y);
}
printf("%d\n", matrix1[0][0]);
// allocating 2d array which is my matrix 2...
matrix2 = (int**)malloc(sizeof(int*) * y);
for(i = 0; i < y; i++){
    matrix2[i] = (int*)malloc(sizeof(int) * z);
}
printf("%d\n", matrix2[0][0]);
// my result matrix which consists of my sendcount[world_rank] and z :)
resultmatrix = (int**)malloc(sizeof(int*) * x);
for(i = 0; i < x; i++){
    resultmatrix[i] = (int*)malloc(sizeof(int) * z);
}
printf("%d\n", resultmatrix[0][0]);

count = 0;
for(i = 0; i < sendcount[world_rank]; i++){
    for(j = 0; j < y; j++){
        matrix1[i][j] = recM1[count];
        count++;
        //printf("%d ", matrix1[i][j]);
    }
    //printf("\n");
    //printf("\n");
}
//printf("\n");
free(recM1);
count = 0;
for(i = 0; i < y; i++){
    for(j = 0; j < z; j++){
        matrix2[i][j] = tempM2[count];
        count++;
        //printf("%d ", matrix2[i][j]);
    }
    //printf("\n");
}
//printf("\n");
free(tempM2);
free(tempM1);
//printf("%d\n", resultmatrix[0][0]);
count = 0;
while(count < z){

    for (i = 0; i < sendcount[world_rank]; i++){

        for(j = 0; j < y; j++){
            //printf("matrix1[%d][%d] * matrix2[%d][%d] = ", i, j, j, count);
            resultmatrix[i][count] += matrix1[i][j] * matrix2[j][count];
            //printf("%d ", resultmatrix[i][count]);

        }
        //printf("rank: %d, i: %d count: %d resultmatrix: %d\n", world_rank, i, count, resultmatrix[i][count]);

    }
    count++;
    //printf("\n");
}
/*for(i = 0; i < sendcount[world_rank]; i++){
    for(j = 0; j < z; j++){
        printf("%d ", resultmatrix[i][j]);
    }
    printf("\n");
}   */
    //int khara = world_rank * scattered_rows;
    //while(i < scattered_rows){
    /*if(world_rank!=0){
        for (i = 0; i < sendcount[world_rank]; i++){
            MPI_Send(resultmatrix[i], z, MPI_INT, root, tag, MPI_COMM_WORLD);
        }
    }*/
        //MPI_Gather(resultmatrix, z*scattered_rows, MPI_INT, global_result, x * z, MPI_INT, root, MPI_COMM_WORLD);
        //i++;
        //khara++;
    //}

/*if(world_rank == root){
    int rank = 1;
    for(j = 0; j < sendcount[world_rank]; j++){
            global_result[j] = resultmatrix[j];
        }
    count = sendcount[world_rank];
    while(rank < world_size){
        for(i = 0; i < sendcount[rank]; i++){
            MPI_Recv(global_result[count++], z, MPI_INT, rank, tag, MPI_COMM_WORLD, &status);

        }
        rank++;
    }*/

    /*while (rank < world_size){
    for(i = scattered_rows i = 0; i < sendcount; i += scattered_rows){
        rank++;
        for(j = 0; j < scattered_rows; j++){
            printf("hello!!!\n");
            MPI_Recv(global_result[count++], z, MPI_INT, rank, tag, MPI_COMM_WORLD, &status);
        }
    }*/


    /*for(i = 0; i < x; i++){
        printf("\n");
        for(j = 0; j < z; j++){
            printf("%d ", global_result[i][j]);
        }
    }*/
    //printf("\nx = %d, y = %d, z= %d", x,y,z);

//}
free(matrix1);
free(matrix2);
MPI_Finalize();
return 0;
}
teh_ouj
  • 65
  • 6
  • 2
    When you allocate memory using `malloc`, it's not initialized in any way, the contents is *indeterminate*. – Some programmer dude Apr 19 '15 at 12:42
  • ah yes joachim but that actually wasnt my problem, i edited my question... because for some reason, only my root Process will print actual multiplied values in my result matrix... – teh_ouj Apr 19 '15 at 12:49
  • are you assigning values to all processes or just the main process? are you syncing the processes in any way? – Shlomi Agiv Apr 19 '15 at 12:51
  • When editing the question to mean something else from what it initially meant, you should probably update the title and text as well. The code doesn't have anything to do with allocation anymore, it's a simple case of stepping through the code line by line in a debugger to see what happens. And you *have* initialized the arrays properly? For example, since you use the `+=` operator, that means you need to initialize `resultmatrix[i][count]` before the loops or the results will indeed be wrong as you will have undefined behavior by reading indeterminate data. – Some programmer dude Apr 19 '15 at 12:53
  • yes, all my Processes have the slices of matrix1, and they all have matrix2... and they all have allocated dynamically result matrix with its own appropriate sliced size... – teh_ouj Apr 19 '15 at 12:54
  • The error in your code is likely in the MPI sections of your code, which you have not shown here. Without that, we cannot help you. Can you provide a [MCVE](http://stackoverflow.com/help/mcve)? – wolfPack88 Apr 19 '15 at 12:57
  • The code edited out is not needed, but if you print the result matrix array, only at process root will it print the result and the rest is jibrish... – teh_ouj Apr 19 '15 at 13:04
  • do you run this code on all processes or only on the main process? because what you have shown will work only on the process it's run on unless you sync it in some way – Shlomi Agiv Apr 19 '15 at 13:28

1 Answers1

0

You allocate memory for resultmatrix[i], but you do not initialize it. That means its contents will be indeterminate and when you use the data in the statement

resultmatrix[i][count] += ...;

you will read and modify that indeterminate data, and you will have undefined behavior.

In the allocation loop you can use e.g. memset to initialize the allocated memory:

for(i = 0; i < x; i++){
    resultmatrix[i] = malloc(sizeof(int) * z);
    memset(resultmatrix[i], 0, sizeof(int) * z);
}

[Note that I removed the cast of the result of malloc, in C you should not do that]

Community
  • 1
  • 1
Some programmer dude
  • 400,186
  • 35
  • 402
  • 621