There will be a large difference between:
Complex a(1.0, 2.0), b(1.1, 2.2);
Complex c;
c = a + b;
and:
Complex a(1.0, 2.0), b(1.1, 2.2);
Complex c;
c.re = a.re + b.re;
c.im = a.im + b.im;
if the Complex operator+(Complex lhs, Complex rhs);
function is not possible to inline. However, if the compile can inline the code, there would be no difference at all.
So it's very much dependent on the relative overhead of the call to the operator and the complexity of the actual call. Doing std::string operator+(const std::string& lhs, const std::string& rhs);
with a pair of strings that is several kilobytes will have very little overhead in the call to the operator+
, compared to the adding of double
or float
numbers in the complex math case above. For short strings, the overhead is much larger, in relative terms.
Here is an example:
oper.cpp:
#include <iostream>
#include <cstdlib>
#include "Complex.h"
#include "Complex2.h"
const int SIZE = 80000;
template<typename CPLX>
void Fill(CPLX* arr)
{
for(int i = 0; i < SIZE; i++)
{
arr[i] = CPLX(rand(), rand());
}
}
template<typename CPLX>
void DummyFunc(CPLX* arr)
{
double sum;
for(int i = 0; i < SIZE; i++)
{
sum = arr[i].re + arr[i].im;
}
std::cout << "Sum:" << sum << std::endl;
}
static __inline__ unsigned long long rdtsc(void)
{
unsigned hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
}
void RunFunc1()
{
Complex arr1[SIZE];
Complex arr2[SIZE];
Complex arr3[SIZE];
Fill(arr1);
Fill(arr2);
unsigned long long t = rdtsc();
for(int i = 0; i < SIZE; i++)
{
arr3[i] = arr2[i] + arr1[i];
}
t = rdtsc() - t;
std::cout << "Time=" << t << std::endl;
DummyFunc(arr3);
}
void RunFunc2()
{
Complex2 arr1[SIZE];
Complex2 arr2[SIZE];
Complex2 arr3[SIZE];
Fill(arr1);
Fill(arr2);
unsigned long long t = rdtsc();
for(int i = 0; i < SIZE; i++)
{
arr3[i] = arr2[i] + arr1[i];
}
t = rdtsc() - t;
std::cout << "Time=" << t << std::endl;
DummyFunc(arr3);
}
void RunFunc3()
{
Complex2 arr1[SIZE];
Complex2 arr2[SIZE];
Complex2 arr3[SIZE];
Fill(arr1);
Fill(arr2);
unsigned long long t = rdtsc();
for(int i = 0; i < SIZE; i++)
{
arr3[i].im = arr2[i].im + arr1[i].im;
arr3[i].re = arr2[i].re + arr1[i].re;
}
t = rdtsc() - t;
std::cout << "Time=" << t << std::endl;
DummyFunc(arr3);
}
int main(int argc, char **argv)
{
RunFunc1();
RunFunc2();
RunFunc3();
}
Complex.h:
class Complex
{
public:
Complex(double r, double i) : re(r), im(i) {}
Complex() {};
double re;
double im;
};
Complex operator+(Complex lhs, Complex rhs);
Complex2.h:
class Complex2
{
public:
Complex2(double r, double i) : re(r), im(i) {}
Complex2() {};
double re;
double im;
};
Complex2 operator+(Complex2 lhs, Complex2 rhs)
{
return Complex2(lhs.re + rhs.re, lhs.im + rhs.im);
}
Complex.cpp:
#include "Complex.h"
Complex operator+(Complex lhs, Complex rhs)
{
return Complex(lhs.re + rhs.re, lhs.im + rhs.im);
}
Build with:
clang++ -O2 -c Complex.cpp
clang++ -O2 oper.cpp Complex.o -std=c++11
Results look something like this:
Time=3265033
Sum:7.09575e+09
Time=1728941
Sum:6.72663e+09
Time=1705123
Sum:2.92692e+09
As you can see, the first time is about 2x that of the two other solutions. This is because the compiler can not inline the Complex.cpp
code when it is in a separate translation unit (compiled separately)