I was trying to calculate the speedup and optimal number of threads for matrix addition but the parallel execution always takes more time than sequential and increases keeps on increasing till about 8 threads and then becomes kind of constant. Can anyone help me figure out why?
The sequential code:
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
int main (int argc, char *argv[])
{
int ARRAY_SIZE;
int n = 10000;
int n_per_thread;
int i,j;
int *a[n];
int *b[n];
int *c[n];
for (i=0; i<n; i++){
a[i] = (int *)malloc(n * sizeof(int));
b[i] = (int *)malloc(n * sizeof(int));
c[i] = (int *)malloc(n * sizeof(int));
}
for(i=0; i<n; i++) {
for(j=0;j<n;j++){
a[i][j] = 1;
}
}
for(i=0; i<n; i++) {
for(j=0;j<n;j++){
b[i][j] = 1;
}
}
clock_t t;
t = clock();
for(i=0; i<n; i++) {
for(j=0;j<n;j++){
c[i][j] = a[i][j]+b[i][j];
}
}
t = clock() - t;
double time_taken = ((double)t)/CLOCKS_PER_SEC;
printf("Time taken by sequential for matrix size %d: ",n);
printf("%f%s\n",time_taken," seconds");
return 0;
}
The parallel code:
#include <stdlib.h>
#include <stdio.h>
#include <omp.h>
#include <time.h>
#define NUM_THREADS 10
int main (int argc, char *argv[])
{
int ARRAY_SIZE;
int n = 10000;
int n_per_thread;
int total_threads = NUM_THREADS;
int i,j;
int *a[n];
int *b[n];
int *c[n];
for (i=0; i<n; i++){
a[i] = (int *)malloc(n * sizeof(int));
b[i] = (int *)malloc(n * sizeof(int));
c[i] = (int *)malloc(n * sizeof(int));
}
for(i=0; i<n; i++) {
for(j=0;j<n;j++){
a[i][j] = 1;
}
}
for(i=0; i<n; i++) {
for(j=0;j<n;j++){
b[i][j] = 1;
}
}
omp_set_num_threads(total_threads);
n_per_thread = n/total_threads;
clock_t t;
t = clock();
#pragma omp parallel for shared(a, b, c) private(i) schedule(static, n_per_thread)
for(i=0; i<n; i++) {
for(j=0;j<n;j++){
c[i][j] = a[i][j]+b[i][j];
}
}
t = clock() - t;
double time_taken = ((double)t)/CLOCKS_PER_SEC;
printf("Time taken by parallel for vector size %d: ",n);
printf("%f%s\n",time_taken," seconds");
return 0;
}