I run into an issue while trying to understand the difference (in terms of computational time) between re-allocate a structure each time it is needed, against allocate a priori and then re-fill (a sort of resetting to default values) the structure.
On windows and Ubuntu (WLS) I have a similar result, i.e., a lot more time when I RE-allocate, on Mac the things change.
Here's the code
#include <algorithm>
#include <random>
#include <thread>
#include <iostream>
#include <chrono>
#include <vector>
#include <numeric>
using namespace std;
template<typename T>
inline double getMs(T start, T end) {
return double(
std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
.count()) /
1000;
}
void pre_allocation(int size, int max_k) {
vector<int> tau_star(size, 1440 * 3);
tau_star.shrink_to_fit();
vector<vector<int>> tau;
tau.reserve(max_k);
vector<vector<int>> pred_trip;
pred_trip.reserve(max_k);
vector<vector<int>> pred_stop;
pred_stop.reserve(max_k);
for (int k = 0; k < max_k; ++k) {
pred_trip.emplace_back(size, -1);
pred_stop.emplace_back(size, std::numeric_limits<size_t>::max());
tau.emplace_back(size, 1440 * 3);
}
for (size_t i = 0; i < size; i++) {
std::fill(tau_star.begin(), tau_star.end(), 1440 * 3);
for (int k = 0; k < max_k; ++k) {
std::fill(tau[k].begin(), tau[k].end(), 1440 * 3);
std::fill(pred_trip[k].begin(), pred_trip[k].end(), -1);
std::fill(pred_stop[k].begin(), pred_stop[k].end(), std::numeric_limits<size_t>::max());
}
}
}
void re_allocation(int size, int max_k) {
for (size_t i = 0; i < size; i++) {
vector<int> tau_star(size, 1440 * 3);
tau_star.shrink_to_fit();
vector<vector<int>> tau;
tau.reserve(max_k);
vector<vector<int>> pred_trip;
pred_trip.reserve(max_k);
vector<vector<int>> pred_stop;
pred_stop.reserve(max_k);
for (int k = 0; k < max_k; ++k) {
pred_trip.emplace_back(size, -1);
pred_stop.emplace_back(size, std::numeric_limits<size_t>::max());
tau.emplace_back(size, 1440 * 3);
}
}
}
int main(int) {
int size = 107333;
int max_k = 3;
auto start_pre_alloc = std::chrono::high_resolution_clock::now();
pre_allocation(size, max_k);
double elapsed_pre_alloc = getMs(start_pre_alloc, std::chrono::high_resolution_clock::now());
auto start_re_alloc = std::chrono::high_resolution_clock::now();
re_allocation(size, max_k);
double elapsed_re_alloc = getMs(start_re_alloc, std::chrono::high_resolution_clock::now());
printf("Time in pre-allocation: %.3f sec\n", elapsed_pre_alloc);
printf("Time in RE-allocation: %.3f sec\n", elapsed_re_alloc);
return 0;
}
These structure are actually used in a larger software, but I needed a small exaple to understand what happens.
The results are:
Windows:
Time in pre-allocation: 11.617 sec
Time in RE-allocation: 53.679 sec
Ubuntu (WLS):
Time in pre-allocation: 15.749 sec
Time in RE-allocation: 81.905 sec
Mac:
Time in pre-allocation: 9.396 sec
Time in RE-allocation: 12.408sec
The specific of my Windows machine are:
- CPU - 11th Gen Intel(R) Core(TM) i7-11700KF @ 3.60GHz
- RAM - 16 GB DDR4
- Windows 11 Compiler - MS_VS 2022
The Mac is a Macbook Pro 2018
- CPU - 6-core Intel Core i9 @2.9 GHz
- RAM - 16GB 2400 MHz DDR4
- macOS Big Sur Version 11.6.5
On windows I compile with VisualStudio, while on Ubuntu and Mac with the following Makefile:
# Directory for my files
MYHOME = ${PWD}
BIN = ${MYHOME}/bin
LIB = ${MYHOME}/lib
SRC = ${MYHOME}
# For Linux:
# OPTFLAG = -O2 -ffast-math -march=native -DNDEBUG -Wall -std=c++17 -DLINUX -Wall
# For Mac:
OPTFLAG = -O2 -ffast-math -DNDEBUG -Wall -std=c++17 -DLINUX -Wall
LDFLAGS = -O2 -DNDEBUG -lm -pthread -std=c++17
COMPILER = g++ ${OPTFLAG}
LINKER = g++ ${LDFLAGS}
# Directory for output files
OUT_DIR=bin lib
# Command line tool
cli: ${OUT_DIR} ${SRC}/main.cpp
${COMPILER} -c ${SRC}/main.cpp -o ${LIB}/main.o
${LINKER} -o ${BIN}/main ${LIB}/main.o
Is it possibile that the compiler on Mac understands that can allocate only once and just re-fill ? why this happens ?