I tested parallel_for_
in OpenCV by comparing with the normal operation for just simple array summation and multiplication.
I have array of 100 integers and split into 10 each and run using parallel_for_
.
Then I also have normal 0 to 99 operation for summation and multiuplication.
Then I measured the elapsed time and normal operation is faster than parallel_for_
operation.
My CPU is Intel(R) Core(TM) i7-2600 Quard Core CPU.
parallel_for_
operation took 0.002sec (took 2 clock cycles) for summation and 0.003sec (took 3 clock cycles) for multiplication.
But normal operation took 0.0000sec (less than one click cycle) for both summation and multiplication. What am I missing? My code is as follow.
TEST Class
#include <opencv2\core\internal.hpp>
#include <opencv2\core\core.hpp>
#include <tbb\tbb.h>
using namespace tbb;
using namespace cv;
template <class type>
class Parallel_clipBufferValues:public cv::ParallelLoopBody
{
private:
type *buffertoClip;
type maxSegment;
char typeOperation;//m = mul, s = summation
static double total;
public:
Parallel_clipBufferValues(){ParallelLoopBody::ParallelLoopBody();};
Parallel_clipBufferValues(type *buffertoprocess, const type max, const char op): buffertoClip(buffertoprocess), maxSegment(max), typeOperation(op){
if(typeOperation == 's')
total = 0;
else if(typeOperation == 'm')
total = 1;
}
~Parallel_clipBufferValues(){ParallelLoopBody::~ParallelLoopBody();};
virtual void operator()(const cv::Range &r) const{
double tot = 0;
type *inputOutputBufferPTR = buffertoClip+(r.start*maxSegment);
for(int i = 0; i < 10; ++i)
{
if(typeOperation == 's')
total += *(inputOutputBufferPTR+i);
else if(typeOperation == 'm')
total *= *(inputOutputBufferPTR+i);
}
}
static double getTotal(){return total;}
void normalOperation(){
//int iteration = sizeof(buffertoClip)/sizeof(type);
if(typeOperation == 'm')
{
for(int i = 0; i < 100; ++i)
{
total *= buffertoClip[i];
}
}
else if(typeOperation == 's')
{
for(int i = 0; i < 100; ++i)
{
total += buffertoClip[i];
}
}
}
};
MAIN
#include "stdafx.h"
#include "TestClass.h"
#include <ctime>
double Parallel_clipBufferValues<int>::total;
int _tmain(int argc, _TCHAR* argv[])
{
const int SIZE=100;
int myTab[SIZE];
double totalSum_by_parallel;
double totalSun_by_normaloperation;
double elapsed_secs_parallel;
double elapsed_secs_normal;
for(int i = 1; i <= SIZE; i++)
{
myTab[i-1] = i;
}
int maxSeg =10;
clock_t begin_parallel = clock();
cv::parallel_for_(cv::Range(0,maxSeg), Parallel_clipBufferValues<int>(myTab, maxSeg, 'm'));
totalSum_by_parallel = Parallel_clipBufferValues<int>::getTotal();
clock_t end_parallel = clock();
elapsed_secs_parallel = double(end_parallel - begin_parallel) / CLOCKS_PER_SEC;
clock_t begin_normal = clock();
Parallel_clipBufferValues<int> norm_op(myTab, maxSeg, 'm');
norm_op.normalOperation();
totalSun_by_normaloperation = norm_op.getTotal();
clock_t end_normal = clock();
elapsed_secs_normal = double(end_normal - begin_normal) / CLOCKS_PER_SEC;
return 0;
}