I am trying very hard to get my code to run using MPI. I am trying to achieve matrix multiplication.
My code is like this
There are two matrices A and B
Scatter the rows of A
Broadcast matrix B
Compute
Gather
I have written the code...but the code is not running right... I am getting segmentation fault.
I have no idea why this is happening...I tried tweaking the code a lot ...but it seems something is always wrong.
Could some one go over this code and tell me why the code is not working?
I have added the comments:
"Scattering matrices"
"Gathering answers" and so on...so even if you could just go through the scatter part of the program and tell me why it is not right, I'll be thankful!
#define N 512
#include <stdio.h>
#include <math.h>
#include <sys/time.h>
#include "mpi.h"
void print_results(char *prompt, float result[N][N]);
int main(int argc, char *argv[])
{
int i, j, k;
MPI_Status status;
int process_rank;//rank of a process
int no_of_processes;//no. of processes
int Master_To_Slave = 0;
int Slave_To_Master = 5;
float a[N][N], b[N][N], c[N][N];
char *usage = "Usage: %s file\n";
FILE *fd;
double elapsed_time, start_time, end_time;
struct timeval tv1, tv2;
if (argc < 2) {
fprintf (stderr, usage, argv[0]);
return -1;
}
if ((fd = fopen (argv[1], "r")) == NULL) {
fprintf (stderr, "%s: Cannot open file %s for reading.\n",
argv[0], argv[1]);
fprintf (stderr, usage, argv[0]);
return -1;
}
// Read input from file for matrices a and b.
// The I/O is not timed because this I/O needs
// to be done regardless of whether this program
// is run sequentially on one processor or in
// parallel on many processors. Therefore, it is
// irrelevant when considering speedup.
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
fscanf (fd, "%f", &a[i][j]);
}
}
for (i = 0; i < N; i++)
for (j = 0; j < N; j++)
fscanf (fd, "%f", &b[i][j]);
int num_of_rows_A = N;
int num_of_cols_A = N;
int num_of_rows_B = N;
int num_of_cols_B = N;
int lower_index_of_A;
int upper_index_of_A;
//TODO: Add a barrier prior to the time stamp.
MPI_Init(&argc, &argv); //initialize MPI operations
MPI_Barrier(MPI_COMM_WORLD); //Added Barrier
// Take a time stamp
gettimeofday(&tv1, NULL);
//TODO: Scatter the input matrices a and b.
MPI_Comm_rank(MPI_COMM_WORLD, &process_rank); //get the rank
MPI_Comm_size(MPI_COMM_WORLD, &no_of_processes); //get number of processes
if(process_rank==0){
fprintf (stderr, "Main process started");
fprintf (stderr, "No. of process %d",no_of_processes);
fprintf (stderr, "\n\n");
fprintf (stderr, "\n\n");
for( i=1; i<no_of_processes;i++)
{
int rows_per_process = num_of_rows_A/(no_of_processes-1);
lower_index_of_A = (i-1)*rows_per_process;
// fprintf (stderr, "Current lower Index of A %s",lower_index_of_A);
if(i+1==no_of_processes &&((num_of_rows_A%(no_of_processes-1))!=0))
{
upper_index_of_A=num_of_rows_A;
// fprintf (stderr, "Current upper Index of A %s",upper_index_of_A);
}
else
{
upper_index_of_A=lower_index_of_A+rows_per_process;
// fprintf (stderr, "Current upper Index of A %s",upper_index_of_A);
}
fprintf (stderr, "Lower index of A %d", lower_index_of_A);
fprintf (stderr, "Upper index of A %d", upper_index_of_A);
fprintf (stderr, "\n\n");
MPI_Send(&lower_index_of_A,1,MPI_INT,i,Master_To_Slave,MPI_COMM_WORLD); //send lower index
MPI_Send(&upper_index_of_A,1,MPI_INT,i,Master_To_Slave+1,MPI_COMM_WORLD);//send upper index
MPI_Send(&a[lower_index_of_A][0],(upper_index_of_A-lower_index_of_A)*num_of_cols_A,MPI_DOUBLE,i,Master_To_Slave+2,MPI_COMM_WORLD);//send rows of A
fprintf (stderr, "Scatter done");
}
MPI_Bcast(&b, num_of_rows_A*num_of_cols_B, MPI_DOUBLE, 0, MPI_COMM_WORLD);
fprintf (stderr, "Broadcast done");
}
else
{
MPI_Recv(&lower_index_of_A, 1, MPI_INT, 0, Master_To_Slave,MPI_COMM_WORLD, &status);
MPI_Recv(&upper_index_of_A, 1, MPI_INT, 0, Master_To_Slave+1,MPI_COMM_WORLD,&status);
MPI_Recv(&a[lower_index_of_A], (upper_index_of_A-lower_index_of_A)*num_of_cols_A, MPI_DOUBLE,0, Master_To_Slave+2,MPI_COMM_WORLD, &status);
//TODO: Add code to implement matrix multiplication (C=AxB) in parallel.
for( i=lower_index_of_A;i<upper_index_of_A;i++)
{
for( j=0;j<num_of_cols_B;j++)
{
for( k=0;k<num_of_rows_B;k++)
{
c[i][j]+=(a[i][k]*b[k][j]);
}
}
}
MPI_Send(&lower_index_of_A, 1, MPI_INT, 0, Slave_To_Master,MPI_COMM_WORLD);
MPI_Send(&upper_index_of_A, 1, MPI_INT, 0, Slave_To_Master+1,MPI_COMM_WORLD);
MPI_Send(&c[lower_index_of_A], (upper_index_of_A-lower_index_of_A)*num_of_cols_B, MPI_DOUBLE,0, Slave_To_Master+2,MPI_COMM_WORLD);
}
//TODO: Gather partial result back to the master process.
if(process_rank==0)
{
for(i=1;i<no_of_processes;i++)
{
MPI_Recv(&lower_index_of_A, 1, MPI_INT, i, Slave_To_Master, MPI_COMM_WORLD, &status);
//receive upper bound from a slave
MPI_Recv(&upper_index_of_A, 1, MPI_INT, i, Slave_To_Master + 1, MPI_COMM_WORLD, &status);
//receive processed data from a slave
MPI_Recv(&c[lower_index_of_A][0], (upper_index_of_A - lower_index_of_A) * num_of_cols_B, MPI_DOUBLE, i, Slave_To_Master + 2, MPI_COMM_WORLD, &status);
}
}
// Take a time stamp. This won't happen until after the master
// process has gathered all the input from the other processes.
gettimeofday(&tv2, NULL);
elapsed_time = (tv2.tv_sec - tv1.tv_sec) +
((tv2.tv_usec - tv1.tv_usec) / 1000000.0);
printf ("elapsed_time=\t%lf (seconds)\n", elapsed_time);
// print results
print_results("C = ", c);
}
void print_results(char *prompt, float result[N][N])
{
int i, j;
printf ("\n\n%s\n", prompt);
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf(" %.2f", result[i][j]);
}
printf ("\n");
}
printf ("\n\n");
}