1

The code i wrote in C for matrix multiplication in MPI shows that my code is taking 5 seconds approx in global time but when i run the same thing in python mpi4py it takes very less time like few milliseconds, what is the problem with mpi in C, because it doesnt feel like 5 seconds when I run it in Linux shell,the output comes really fast but still shows the globaltime as 5 seconds.the code below is in C

#define N 4
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"


void print_results(char *prompt, int a[N][N]);

int main(int argc, char *argv[])
{
    int i, j, k, rank, size, tag = 99, sum = 0;
    int a[N][N];
    int b[N][N];
    int c[N][N];
    int aa[N],cc[N];
    int row,col;
    int dest = 0;
    int source;
    double time1, time2, duration, global;
    MPI_Status status;

    MPI_Init(&argc, &argv);
    time1 = MPI_Wtime();
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if(rank == 0){
        

        printf("enter the number of row =");    
        scanf("%d",&row);    
        printf("enter the number of column =");    
        scanf("%d",&col);    

        srand(time(NULL));
        for(i=0;i<row;i++) {
            for(j=0;j<col;j++){
                a[i][j] = rand() % 10;
            }
        }

        srand(time(NULL));

        for(i=0;i<row;i++){
            for(j=0;j<col;j++){
                b[i][j] = rand() % 10;
            }
        }
    }

    MPI_Scatter(a, N*N/size, MPI_INT, aa, N*N/size, MPI_INT,0,MPI_COMM_WORLD);

    MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);

    MPI_Barrier(MPI_COMM_WORLD);

          for (i = 0; i < N; i++)
            {
                    for (j = 0; j < N; j++)
                    {
                            sum = sum + aa[j] * b[j][i];               
                    }
                    cc[i] = sum;
                    sum = 0;
            }

    MPI_Gather(cc, N*N/size, MPI_INT, c, N*N/size, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Barrier(MPI_COMM_WORLD);

    time2 = MPI_Wtime();
    duration = time2 - time1;
    MPI_Reduce(&duration,&global,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
    if(rank == 0) {
        printf("Global runtime is %f\n",global);
    }
    printf("Runtime at %d is %f \n", rank,duration);       
    MPI_Finalize();
    if (rank == 0)                      
      print_results("C = ", c);
}

void print_results(char *prompt, int a[N][N])
{
    int i, j;

    printf ("\n\n%s\n", prompt);
    for (i = 0; i < N; i++) {
            for (j = 0; j < N; j++) {
                    printf(" %d", a[i][j]);
            }
            printf ("\n");
    }
    printf ("\n\n");
}

The output it gives is

4
4
enter the number of row =enter the number of column =Global runtime is 5.975327
Runtime at 0 is 1.493793 
Runtime at 1 is 1.493793 
Runtime at 2 is 1.493877 
Runtime at 3 is 1.493865 


C = 
 78 83 142 116
 128 138 236 194
 39 49 112 71
 96 109 204 156

Please let me know if there is some problem with the code!!

h3avyc0der
  • 29
  • 5

1 Answers1

0

As discussed in the comment I have changed the position of time1 = MPI_Wtime(); and included a Barrier.

Take a look at the modified code :

#define N 4
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"


void print_results(char *prompt, int a[N][N]);

int main(int argc, char *argv[])
{
    int i, j, k, rank, size, tag = 99, sum = 0;
    int a[N][N];
    int b[N][N];
    int c[N][N];
    int aa[N],cc[N];
    int row,col;
    int dest = 0;
    int source;
    double time1, time2, duration, global;
    MPI_Status status;

    MPI_Init(&argc, &argv);
    
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if(rank == 0){
        

        printf("enter the number of row =");    
        scanf("%d",&row);    
        printf("enter the number of column =");    
        scanf("%d",&col);    

        srand(time(NULL));
        for(i=0;i<row;i++) {
            for(j=0;j<col;j++){
                a[i][j] = rand() % 10;
            }
        }

        srand(time(NULL));

        for(i=0;i<row;i++){
            for(j=0;j<col;j++){
                b[i][j] = rand() % 10;
            }
        }
    }
    MPI_Barrier(MPI_COMM_WORLD);
    time1 = MPI_Wtime();
    MPI_Scatter(a, N*N/size, MPI_INT, aa, N*N/size, MPI_INT,0,MPI_COMM_WORLD);

    MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);

    MPI_Barrier(MPI_COMM_WORLD);

          for (i = 0; i < N; i++)
            {
                    for (j = 0; j < N; j++)
                    {
                            sum = sum + aa[j] * b[j][i];               
                    }
                    cc[i] = sum;
                    sum = 0;
            }

    MPI_Gather(cc, N*N/size, MPI_INT, c, N*N/size, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Barrier(MPI_COMM_WORLD);

    time2 = MPI_Wtime();
    duration = time2 - time1;
    MPI_Reduce(&duration,&global,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
    if(rank == 0) {
        printf("Global runtime is %f\n",global);
    }
    printf("Runtime at %d is %f \n", rank,duration);       
    MPI_Finalize();
    if (rank == 0)                      
      print_results("C = ", c);
}

void print_results(char *prompt, int a[N][N])
{
    int i, j;

    printf ("\n\n%s\n", prompt);
    for (i = 0; i < N; i++) {
            for (j = 0; j < N; j++) {
                    printf(" %d", a[i][j]);
            }
            printf ("\n");
    }
    printf ("\n\n");
}

By doing so you will not take into account the user input time and also plcing the Barrier before the first the first timing will ensure that all process have nearly identical starting.

Also beware that you code only works with 4x4 matrix !

Apart from that you sould have something like :

mpirun -n 4 a.out
enter the number of row =4
enter the number of column =4
Global runtime is 0.005867
Runtime at 0 is 0.001474
Runtime at 1 is 0.001464
Runtime at 2 is 0.001464
Runtime at 3 is 0.001466

PilouPili
  • 2,601
  • 2
  • 17
  • 31
  • Yeah I noticed that it only works for 4x4 matrix for some reason, it should work for all, I don't know whats the problem – h3avyc0der Jun 29 '22 at 07:21
  • I think you should take a look at https://mpitutorial.com/tutorials/mpi-scatter-gather-and-allgather/ . It is mainly a mixup in size. I am sure you will find the problem on your own. Don't hesitate to upvote and accept answer. – PilouPili Jun 29 '22 at 07:23
  • If I do 10x10 it gives garbage vales with negative answers in it too, I change the value of N that is defined to 10 and run it – h3avyc0der Jun 29 '22 at 07:24
  • #define N 4 -> implies fixed size array and you are asking for a dynamic size. Allocate dynamically a, b, c, aa, cc – PilouPili Jun 29 '22 at 07:27
  • oh okay, will try to do that – h3avyc0der Jun 29 '22 at 07:31