I was playing around to speed my code up and by accident found out that one can initialize a vector, manually increase its capacity, then set its elements by index to a desired value instead of the regular push_back()
. Here is an instance for storing elements from 0 to N in a vector:
std::vector<int> vec;
vec.reserve(N);
for (int = 0; i < N; ++i) {
vec[i] = i; // instead of vec.push_back(i);
}
I found this method to be significantly faster compared to using push_back()
(1663 ms vs 3918 ms using N=1e9 on my machine). On the other hand, I was told it was not legal code and would leave me with a broken vector instance. Though when I checked, all the elements were correct.
Is this a capital offense? Or should it be avoided only because accessing a value at an index would give garbage unless it had been set beforehand?
Thanks in advance!
P.S.: I am aware that one could initialize the vector with std::vector<int> vec(N)
and would not have to fear of accessing invalid data. It is not as fast tought.
Edit 1:
- As advised in a comment I tried to store strings and it threw an error, as one would expect it.
- Then I also tried to use integers but this time with debugging and that also throw an error! So it seems that using release mode (Visual Studi Community 2019) with integers does not trigger an error.
- Another commenter was right, the size remains 0 (checked by printing the size with release mode and integer container).
- And using
resize()
made it valid, as told by yet another commenter.
Here is the abomination that I used:
#include <chrono>
#include <iostream>
#include <vector>
double measure_vector_without_reserve(int elements_to_load)
{
auto t1 = std::chrono::high_resolution_clock::now();
std::vector<int> vec;
for (int i = 0; i < elements_to_load; ++i) {
vec.push_back(i);
}
vec[int(elements_to_load / 2)] = 500;
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
std::cout << "Filling up the vector took '" << duration << "' ms.\n";
return double(duration);
}
double measure_vector_with_reserve(int elements_to_load)
{
auto t1 = std::chrono::high_resolution_clock::now();
std::vector<int> vec;
vec.reserve(elements_to_load);
for (int i = 0; i < elements_to_load; ++i) {
vec[i] = i;
}
vec[int(elements_to_load / 2)] = 500;
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
std::cout << "Filling up the vector after reserving space took '" << duration << "' ms.\n";
return double(duration);
}
double measure_vector_initialized_with_size(int elements_to_load)
{
auto t1 = std::chrono::high_resolution_clock::now();
std::vector<int> vec(elements_to_load);
for (int i = 0; i < elements_to_load; ++i) {
vec[i] = i;
}
vec[int(elements_to_load / 2)] = 500;
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
std::cout << "Filling up the vector initialized with size '" << duration << "' ms.\n";
return double(duration);
}
double measure_vector_initialized_with_size_and_value(int elements_to_load)
{
auto t1 = std::chrono::high_resolution_clock::now();
std::vector<int> vec(elements_to_load, 0);
for (int i = 0; i < elements_to_load; ++i) {
vec[i] = i;
}
vec[int(elements_to_load / 2)] = 500;
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
std::cout << "Filling up the vector initialized with size and value took '" << duration << "' ms.\n";
return double(duration);
}
double measure_array_c_style(int elements_to_load)
{
auto t1 = std::chrono::high_resolution_clock::now();
int *arr = new int[elements_to_load];
for (int i = 0; i < elements_to_load; ++i) {
arr[i] = i;
}
arr[int(elements_to_load / 2)] = 500;
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
std::cout << "Filling up the array took '" << duration << "' ms.\n";
delete arr;
return double(duration);
}
double measure_vector_with_reserve_push_back(int elements_to_load)
{
auto t1 = std::chrono::high_resolution_clock::now();
std::vector<int> vec;
vec.reserve(elements_to_load);
for (int i = 0; i < elements_to_load; ++i) {
vec.push_back(i);
}
vec[int(elements_to_load / 2)] = 500;
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
std::cout << "Filling up the vector with .push_back() after reserving space took '" << duration << "' ms.\n";
return double(duration);
}
double measure_vector_with_reserve_emplace_back(int elements_to_load)
{
auto t1 = std::chrono::high_resolution_clock::now();
std::vector<int> vec;
vec.reserve(elements_to_load);
for (int i = 0; i < elements_to_load; ++i) {
vec.emplace_back(i);
}
vec[int(elements_to_load / 2)] = 500;
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
std::cout << "Filling up the vector with .emplace_back() after reserving space took '" << duration << "' ms.\n";
return double(duration);
}
int main(void)
{
int elements_to_load = 1000000000; // 1e9
int iterations = 10;
// Measure multiple times in different order, so boosted CPU clocks play less factor
std::vector<double> results1;
std::vector<double> results2;
std::vector<double> results3;
std::vector<double> results4;
std::vector<double> results5;
std::vector<double> results6;
std::vector<double> results7;
// Play one to warm up the engine
measure_vector_without_reserve(elements_to_load);
for (int i = 0; i != iterations; ++i) {
results1.push_back(measure_vector_without_reserve(elements_to_load));
results2.push_back(measure_vector_with_reserve(elements_to_load));
results3.push_back(measure_vector_initialized_with_size(elements_to_load));
results4.push_back(measure_vector_initialized_with_size_and_value(elements_to_load));
results5.push_back(measure_array_c_style(elements_to_load));
results6.push_back(measure_vector_with_reserve_push_back(elements_to_load));
results7.push_back(measure_vector_with_reserve_emplace_back(elements_to_load));
}
auto avg = [&](std::vector<double> vec) -> double {
double sum = 0;
for (double elem : vec) {
sum += elem;
}
return sum / static_cast<double>(vec.size());
};
std::cout << "'" << elements_to_load << "' elements set to a vector without reserving space took '" << avg(results1)<< "' ms. on average\n";
std::cout << "'" << elements_to_load << "' elements set to a vector with [] after reserving space took '" << avg(results2)<< "' ms. on average\n";
std::cout << "'" << elements_to_load << "' elements set to a vector after initializing with size took '"<< avg(results3) << "' ms. on average\n";
std::cout << "'" << elements_to_load << "' elements set to a vector after initializing with size and value took '"<< avg(results4) << "' ms. on average\n";
std::cout << "'" << elements_to_load << "' elements set to a C style array took '" << avg(results5)<< "' ms. on average\n";
std::cout << "'" << elements_to_load << "' elements set to a vector with .push_back() after reserving space took '" << avg(results6)<< "' ms. on average\n";
std::cout << "'" << elements_to_load << "' elements set to a vector with .emplace_back() after reserving space took '" << avg(results7)<< "' ms. on average\n";
return 0;