I was playing with multithreading by solving quadratic equations using a sequential section for comparison and several implementation using the OpenMP API.
For the first parallel version i was just distributing the execution load using the thread id:
delta = 0;
start = clock();
#pragma omp parallel num_threads(P) shared(x1, x2, b, a, c) private(delta)
{
int threadID = omp_get_thread_num();
for (int i = threadID; i < N; i += P)
{
delta = b[i] * b[i] - 4 * a[i] * c[i];
if (delta >= 0)
{
x1[i] = (-b[i] + sqrt(delta)) / (2 * a[i]);
x2[i] = (-b[i] - sqrt(delta)) / (2 * a[i]);
}
}
}
stop = clock();
durata_par = (double)(stop - start) / CLOCKS_PER_SEC;
printf("P_V1 %2.10f seconds\n", durata_par);
printf("P_V1 FA=%2.2f\n", durata_secv / durata_par);
printf("P_V1 E(%d)=%2.2f\n", P, (durata_secv / durata_par) / P);
and then i tried to distribute loop iterations using #pragma omp for.
delta = 0;
start = clock();
#pragma omp parallel num_threads(P) shared(x1, x2, b, a, c) private(delta)
{
int threadID = omp_get_thread_num();
int numberofThreads = omp_get_num_threads();
if (threadID == 0)
{
std::cout << "Number of threads: " << numberofThreads << std::endl;
}
#pragma omp for
for (int i = 0; i < N; i++)
{
delta = b[i] * b[i] - 4 * a[i] * c[i];
if (delta >= 0)
{
x1[i] = (-b[i] + sqrt(delta)) / (2 * a[i]);
x2[i] = (-b[i] - sqrt(delta)) / (2 * a[i]);
}
}
}
stop = clock();
durata_par = (double)(stop - start) / CLOCKS_PER_SEC;
printf("P_V2 %2.10f seconds\n", durata_par);
printf("P_V2 FA=%2.2f\n", durata_secv / durata_par);
printf("P_V2 E(%d)=%2.2f\n", P, (durata_secv / durata_par) / P);
So far so good, but what I noticed is that if i remove the value assignation to delta (delta=0, from the top of the second version) there was an increase in execution time and as such a drop of the amplifying factor(FA) of 4.4 to 1.4.
Is there an explanation of this behavior? (There should be no difference because delta is declared private and it will be redeclared in each thread anyway)