I Created a struct of three integers and overloaded the operator*=, then created a vector of size 100000000 and filled it with 0 and 1 randomly.
Then tested two versions of a function that sums up the elements multiplied by 2. When I use the function that uses the overloaded operator*= I got almost a 60% overhead.
Any guess what is happening?
Tested on:
- Windows 10
- Visual Studio 2019 (MSVC 14.29.30133)
- Project Configuration: Release x64 (O2, Whole Program Optimization enabled).
Thanks
Check main, without_overloading and with_overloading functions.
#include <random>
#include <vector>
#include <iostream>
#include <ctime>
/*
I took the code for calculating times from this post:
https://stackoverflow.com/questions/17432502/how-can-i-measure-cpu-time-and-wall-clock-time-on-both-linux-windows/17440673#17440673
*/
// Windows
#ifdef _WIN32
#include <Windows.h>
double get_wall_time() {
LARGE_INTEGER time, freq;
if (!QueryPerformanceFrequency(&freq)) {
// Handle error
return 0;
}
if (!QueryPerformanceCounter(&time)) {
// Handle error
return 0;
}
return (double)time.QuadPart / freq.QuadPart;
}
double get_cpu_time() {
FILETIME a, b, c, d;
if (GetProcessTimes(GetCurrentProcess(), &a, &b, &c, &d) != 0) {
// Returns total user time.
// Can be tweaked to include kernel times as well.
return
(double)(d.dwLowDateTime |
((unsigned long long)d.dwHighDateTime << 32)) * 0.0000001;
}
else {
// Handle error
return 0;
}
}
// Posix/Linux
#else
#include <time.h>
#include <sys/time.h>
double get_wall_time() {
struct timeval time;
if (gettimeofday(&time, NULL)) {
// Handle error
return 0;
}
return (double)time.tv_sec + (double)time.tv_usec * .000001;
}
double get_cpu_time() {
return (double)clock() / CLOCKS_PER_SEC;
}
#endif
//=================================================================================
const size_t size = 100000000;
struct vec {
int x, y, z;
vec& operator*=(int rhs) {
x *= rhs;
y *= rhs;
z *= rhs;
return *this;
}
};
void without_overloading(std::vector<vec> const &points) {
double wall0 = get_wall_time();
double cpu0 = get_cpu_time();
vec sum{ 0,0,0 };
for (size_t i = 0; i < size; ++i) {
sum.x += points[i].x * 2;
sum.y += points[i].y * 2;
sum.z += points[i].z * 2;
}
double wall1 = get_wall_time();
double cpu1 = get_cpu_time();
std::cout << "Wall Time = " << wall1 - wall0 << std::endl;
std::cout << "CPU Time = " << cpu1 - cpu0 << std::endl;
std::cout << sum.x << " " << sum.y << " " << sum.z << std::endl;
}
void with_overloading(std::vector<vec> &points) {
double wall0 = get_wall_time();
double cpu0 = get_cpu_time();
vec sum{ 0,0,0 };
for (size_t i = 0; i < size; ++i) {
points[i] *= 2;
sum.x += points[i].x;
sum.y += points[i].y;
sum.z += points[i].z;
}
double wall1 = get_wall_time();
double cpu1 = get_cpu_time();
std::cout << "Overloading Wall Time = " << wall1 - wall0 << std::endl;
std::cout << "Overloading CPU Time = " << cpu1 - cpu0 << std::endl;
std::cout << sum.x << " " << sum.y << " " << sum.z << std::endl;
}
int main() {
std::srand(std::time(nullptr));
std::vector<vec> points(size);
for (int i = 0; i < size; ++i) {
points[i] = { rand() % 2, rand() % 2, rand() % 2 };
}
without_overloading(points);
with_overloading(points);
}