I was hoping to improve performance by passing function pointer or a function object to a function call within a nested loop, in order to avoid branching of the loop. Below are three codes: one with function object, with function pointer and with branching. For any of compiler optimization option or for any of the problem size, the function pointer and object versions both perform the least. This is surprising to me; why would the overhead due to function pointer or object scale with problem size? Second question. Why is the function object performing worse than the function pointer?
Update
To the end I am also adding a lambda expression version of the same code. Again the brute force wins. The lambda expression version takes more than twice the time with or without optimization compared to the corresponding brute force code, and for different problem size.
Codes below. Execute with ./a.out [SIZE] [function choice]
Function Object:
#include <iostream>
#include <chrono>
class Interpolator
{
public:
Interpolator(){};
virtual double operator()(double left, double right) = 0;
};
class FirstOrder : public Interpolator
{
public:
FirstOrder(){};
virtual double operator()(double left, double right) { return 2.0 * left * left * left + 3.0 * right; }
};
class SecondOrder : public Interpolator
{
public:
SecondOrder(){};
virtual double operator()(double left, double right) { return 2.0 * left * left + 3.0 * right * right; }
};
double kernel(double left, double right, Interpolator *int_func) { return (*int_func)(left, right); }
int main(int argc, char *argv[])
{
double *a;
int SIZE = atoi(argv[1]);
int it = atoi(argv[2]);
//initialize
a = new double[SIZE];
for (int i = 0; i < SIZE; i++)
a[i] = (double)i;
std::cout << "Initialized" << std::endl;
Interpolator *first;
switch (it)
{
case 1:
first = new FirstOrder();
break;
case 2:
first = new SecondOrder();
break;
}
std::cout << "function" << std::endl;
auto start = std::chrono::high_resolution_clock::now();
//loop
double g;
for (int i = 0; i < SIZE; i++)
{
g = 0.0;
for (int j = 0; j < SIZE; j++)
{
g += kernel(a[i], a[j], first);
}
a[i] += g;
}
auto stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << "Finalized in " << duration.count() << " ms" << std::endl;
return 0;
}
Function Pointer:
#include <iostream>
#include <chrono>
double firstOrder(double left, double right) { return 2.0 * left * left * left + 3.0 * right; }
double secondOrder(double left, double right) { return 2.0 * left * left + 3.0 * right * right; }
double kernel(double left, double right, double (*f)(double, double))
{
return (*f)(left, right);
}
int main(int argc, char *argv[])
{
double *a;
int SIZE = atoi(argv[1]);
int it = atoi(argv[2]);
a = new double[SIZE];
for (int i = 0; i < SIZE; i++)
a[i] = (double)i; // initialization
std::cout << "Initialized" << std::endl;
//Func func(it);
double (*func)(double, double);
switch (it)
{
case 1:
func = &firstOrder;
break;
case 2:
func = &secondOrder;
break;
}
std::cout << "function" << std::endl;
auto start = std::chrono::high_resolution_clock::now();
//loop
double g;
for (int i = 0; i < SIZE; i++)
{
g = 0.0;
for (int j = 0; j < SIZE; j++)
{
g += kernel(a[i], a[j], func);
}
a[i] += g;
}
auto stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << "Finalized in " << duration.count() << " ms" << std::endl;
return 0;
}
Branching:
#include <iostream>
#include <chrono>
double firstOrder(double left, double right) { return 2.0 * left * left * left + 3.0 * right; }
double secondOrder(double left, double right) { return 2.0 * left * left + 3.0 * right * right; }
int main(int argc, char *argv[])
{
double *a;
int SIZE = atoi(argv[1]); // array size
int it = atoi(argv[2]); // function choice
//initialize
a = new double[SIZE];
double g;
for (int i = 0; i < SIZE; i++)
a[i] = (double)i; // initialization
std::cout << "Initialized" << std::endl;
auto start = std::chrono::high_resolution_clock::now();
//loop
for (int i = 0; i < SIZE; i++)
{
g = 0.0;
for (int j = 0; j < SIZE; j++)
{
if (it == 1)
{
g += firstOrder(a[i], a[j]);
}
else if (it == 2)
{
g += secondOrder(a[i], a[j]);
}
}
a[i] += g;
}
auto stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << "Finalized in " << duration.count() << " ms" << std::endl;
return 0;
}
Lambda expression
#include <iostream>
#include <chrono>
#include<functional>
std::function<double(double, double)> makeLambda(int kind){
return [kind] (double left, double right){
if(kind == 0) return 2.0 * left * left * left + 3.0 * right;
else if (kind ==1) return 2.0 * left * left + 3.0 * right * right;
};
}
int main(int argc, char *argv[])
{
double *a;
int SIZE = atoi(argv[1]);
int it = atoi(argv[2]);
//initialize
a = new double[SIZE];
for (int i = 0; i < SIZE; i++)
a[i] = (double)i;
std::cout << "Initialized" << std::endl;
std::function<double(double,double)> interp ;
switch (it)
{
case 1:
interp = makeLambda(0);
break;
case 2:
interp = makeLambda(1);
break;
}
std::cout << "function" << std::endl;
auto start = std::chrono::high_resolution_clock::now();
//loop
double g;
for (int i = 0; i < SIZE; i++)
{
g = 0.0;
for (int j = 0; j < SIZE; j++)
{
g += interp(a[i], a[j]);
}
a[i] += g;
}
auto stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << "Finalized in " << duration.count() << " ms" << std::endl;
return 0;
}