I'm trying to learn OpenMP to parallelize a part of my code and I'm trying to figure out why it's not faster when using 2 threads instead of 1. Here's a minimal working example of the code:
#include <iostream>
#include <omp.h>
using namespace std;
class My_class
{
public :
// Constructor
My_class(int nuIterations)
: prVar_(0),
nuIters_(nuIterations)
{} // Empty
// Do something expensive involving the class' private vars
void do_calculations()
{
for (int i=0;i<nuIters_;++i){
prVar_=prVar_+i+2*i+3*i+4*i-5*i-4*i;
}
}
// Retrieve result
double getResult()
{
return prVar_;
}
private:
double prVar_;
int nuIters_;
};
int main()
{
// Initialize one object for every thread
My_class *test_object1, *test_object2;
test_object1 = new My_class(1000000000);
test_object2 = new My_class(500000000);
// Set number of threads (use one line at a time)
omp_set_num_threads(1); // One thread executes in 11.5 real seconds
//omp_set_num_threads(2); // Two threads execute in 13.2 real seconds
double start = omp_get_wtime(); // Start timer
#pragma omp parallel sections // Do calculations in parallel
{
#pragma omp section
{
test_object1->do_calculations();
}
#pragma omp section
{
test_object2->do_calculations();
}
}// End of parallel sections
// Print results
double end = omp_get_wtime();
cout<<"Res 1 : "<<test_object1->getResult()<<endl;
cout<<"Res 2 : "<<test_object2->getResult()<<endl;
cout<<"Time : "<<end-start<<endl;
return 0;
}
Compiling and running this using g++ myomp.cpp -O0 -std=c++11 -fopenmp
gives the following execution time for 1 and 2 threads:
- 1 thread : 11.5 seconds
- 2 threads: 13.2 seconds
Is there some way I can speed this up for 2 threads? I am running this on a 4-core Intel i7-4600U and Ubuntu.
EDIT: Changed most of the post such that it follows the guidlines.