I'm attempting to implement 2D convolution in C. I have 2 implementations: first one is sequential and the second one is a parallel implementation using openMP. The problem is that the runtime for parallel code is always more than the runtime for the sequential code. The original algorithm was implemented in C++ using std::vector and It works fine, but I'm trying to implement It in C using pointers and dynamic memory allocation.
Here's the steps taken in the algorithm:
1- read input_matrix and kernel from separate files.
2- compute convolution
3- write the result to a file.
I use dynamic memory allocation for the input_matrix, kernel, and output_matrix. Border columns and border rows are assumed to be neighbors, and kernels are assumed to have odd number of rows and columns.
Here's the sequential convolution:
int index1, index2, a, b;
for(int z = 0; z<rows*columns;++z)
*(output_matrix + z) = 0;
clock_t begin = clock();
for(int x=0;x<rows;++x){
for(int y=0;y<columns;++y){
for(int i=0;i<krows;++i){
for(int j=0;j<kcolumns;++j){
a=x+i-krows/2;
b=y+j-kcolumns/2;
if(a<0)
index1=rows+a;
else if(a>rows-1)
index1=a-rows;
else
index1=a;
if(b<0)
index2=columns+b;
else if(b>columns-1)
index2=b-columns;
else
index2=b;
output_matrix[x*columns+y]+=input_matrix[index1*columns+index2]*kernel_matrix[i*kcolumns+j];
}
}
}
}
clock_t end = clock();
printf("Sequential runtime = %f\n", (double)(end - begin) / CLOCKS_PER_SEC);
And here is the parallel code for convolution:
int index1, index2, a, b,x,y;
for(int z = 0; z<rows*columns;++z)
*(output_matrix + z) = 0;
clock_t begin = clock();
#pragma omp parallel
# pragma omp for private(x,y,a, b, index1, index2)
for(int z=0;z<rows*columns;++z){
x=z/columns;
y=z%columns;
for(int i=0;i<krows;++i){
for(int j=0;j<kcolumns;++j){
a=x+i-krows/2;
b=y+j-kcolumns/2;
if(a<0)
index1=rows+a;
else if(a>rows-1)
index1=a-rows;
else
index1=a;
if(b<0)
index2=columns+b;
else if(b>columns-1)
index2=b-columns;
else
index2=b;
output_matrix[x*columns+y]+=input_matrix[index1*columns+index2]*kernel_matrix[i*kcolumns+j];
}
}
}
clock_t end = clock();
printf("Parallel runtime using OMP= %f\n", (double)(end - begin) / CLOCKS_PER_SEC);