So I have a task to compare sequential and parallel version of the code.
#sequential
for (int k = 0; k < ROWS; k++) {
for (int i = 0; i < COLUMNS; i++) {
for (int j = 0; j < COLUMNS; j++) {
c[i][j] += a[i][k] * b[k][j];
}}}
#parallel
omp_set_num_threads(8);
int r = 128;
#pragma omp parallel for
for (int i = 0; i < ROWS; i += r) {
for (int j = 0; j < COLUMNS; j += r) {
for (int k = 0; k < COLUMNS; k += r) {
for (int ii = i; ii < i + r; ii++) {
for (int jj = j; jj <j + r; jj++) {
for (int kk = k; kk < k + r; kk++) {
c[ii][jj] += a[ii][kk] * b[kk][jj];
}}}}}}
!I have to use this loop nesting! The problem that I have is that parallel version of the code is acually slower than sequential one. Do I make a mistake somewhere or it is like this bcs of the loop nesting? f.e for size of matrix = 1024, seq = 0.61[s], par = 1.16[s]
Edit:
#pragma omp parallel for reduction(+:sum)
for (int k = 0; k < ROWS; k++) {
for (int i = 0; i < COLUMNS; i++) {
sum = 0.0;
for (int j = 0; j < COLUMNS; j++) {
sum = a[i][k] * b[k][j];
c[i][j] += sum;
}
}
}