In order to quantify the difference in performance of a C-like array and Vectors in C++, I wrote this little program. https://github.com/rajatkhanduja/Benchmarks/blob/master/C%2B%2B/vectorVsArray.cpp
To compare them on common grounds, I decided to test both for random and sequential access. I added iterators, just to compare them as well (but that is not what the question focusses on).
The results, for a 64-bit Linux machine with 7.7 GB RAM with on an array/vector size of 1 million are as follows:-
- Time taken to write to array. : 12.0378 ms
- Time taken to read from array sequentially. : 2.48413 ms
- Time taken to read from array randomly. : 37.3931 ms
- Time taken to write to dynamic array. : 11.7458 ms
- Time taken to read from dynamic array sequentially. : 2.85107 ms
- Time taken to read from dynamic array randomly. : 36.0579 ms
- Time taken to write to vector using indices. : 11.3909 ms
- Time taken to read from vector using indices, sequentially. : 4.09106 ms
- Time taken to read from vector using indices, randomly. : 39 ms
- Time taken to write to vector using iterators. : 24.9949 ms
- Time taken to read from vector using iterators. : 18.8049 ms
The vector's size is set at the time of initialization and not altered, so there is no resizing of the vector (the assertion in the program helps verify that). The times don't include the initialization times of any of the statically allocated array, dynamically allocated array or the vector.
According to the statistics, the time to write to Vector is lesser than that of array but the time to read from vector is twice as much as that of array.
The difference is pretty small, but is there an explanation as to why there is a performance difference ? Is there something wrong with the test ? I expected both to perform at the same speed. The repetition of this test shows the same trend.
The code:
#include <vector>
#include <iostream>
#include <cstdlib>
#include <ctime>
#include <sys/time.h>
#include <cassert>
#define ARR_SIZE 1000000
using std::string;
void printtime (struct timeval& start, struct timeval& end, string str);
int main (void)
{
int arr[ARR_SIZE];
int tmp;
struct timeval start, stop;
srand (time (NULL));
/* Writing data to array */
gettimeofday (&start, NULL);
for (int i = 0; i < ARR_SIZE; i++)
{
arr[i] = rand();
}
gettimeofday (&stop, NULL);
printtime (start, stop, string ("Time taken to write to array."));
/* Reading data from array */
gettimeofday (&start, NULL);
for (int i = 0; i < ARR_SIZE; i++)
{
tmp = arr[i];
}
gettimeofday (&stop, NULL);
printtime (start, stop, string ("Time taken to read from array sequentially."));
/* Reading data from array randomly*/
gettimeofday (&start, NULL);
for (int i = 0; i < ARR_SIZE; i++)
{
tmp = arr[rand() % ARR_SIZE];
}
gettimeofday (&stop, NULL);
printtime (start, stop, string ("Time taken to read from array randomly."));
int *darr = (int *) calloc (sizeof (int), ARR_SIZE);
/* Writing data to array */
gettimeofday (&start, NULL);
for (int i = 0; i < ARR_SIZE; i++)
{
darr[i] = rand();
}
gettimeofday (&stop, NULL);
printtime (start, stop, string ("Time taken to write to dynamic array."));
/* Reading data from array */
gettimeofday (&start, NULL);
for (int i = 0; i < ARR_SIZE; i++)
{
tmp = darr[i];
}
gettimeofday (&stop, NULL);
printtime (start, stop, string ("Time taken to read from dynamic array sequentially."));
/* Reading data from dynamic array randomly*/
gettimeofday (&start, NULL);
for (int i = 0; i < ARR_SIZE; i++)
{
tmp = darr[rand() % ARR_SIZE];
}
gettimeofday (&stop, NULL);
printtime (start, stop, string ("Time taken to read from dynamic array randomly."));
std::vector<int> v(ARR_SIZE);
assert (v.capacity() == ARR_SIZE);
/* Writing to vector using indices*/
gettimeofday (&start, NULL);
for (int i = 0; i < ARR_SIZE; i++)
{
v[i] = rand();
}
gettimeofday (&stop, NULL);
printtime (start, stop, string ("Time taken to write to vector using indices."));
assert (v.capacity() == ARR_SIZE);
/* Reading from vector using indices*/
gettimeofday (&start, NULL);
for (int i = 0; i < ARR_SIZE; i++)
{
tmp = v[i];
}
gettimeofday (&stop, NULL);
printtime (start, stop, string ("Time taken to read from vector using indices, sequentially."));
/* Reading data from dynamic array randomly*/
gettimeofday (&start, NULL);
for (int i = 0; i < ARR_SIZE; i++)
{
tmp = v[rand() % ARR_SIZE];
}
gettimeofday (&stop, NULL);
printtime (start, stop, string ("Time taken to read from vector using indices, randomly."));
std::vector<int> v2(ARR_SIZE);
/* Writing to vector using iterators*/
gettimeofday (&start, NULL);
std::vector<int>::iterator itr, itr_end;
for (itr = v2.begin(), itr_end = v2.end(); itr != itr_end; itr++)
{
*itr = rand();
}
gettimeofday (&stop, NULL);
printtime (start, stop, string ("Time taken to write to vector using iterators."));
/* Reading from vector using iterators*/
gettimeofday (&start, NULL);
for (itr = v2.begin(), itr_end = v2.end(); itr != itr_end; itr++)
{
tmp = *itr;
}
gettimeofday (&stop, NULL);
printtime (start, stop, string ("Time taken to read from vector using iterators."));
return 0;
}
void printtime (struct timeval& start, struct timeval& end, string str)
{
double start_time, end_time, diff;
start_time = ((start.tv_sec) * 1000 + start.tv_usec/1000.0);
end_time = ((end.tv_sec) * 1000 + end.tv_usec/1000.0);
diff = end_time - start_time;
std::cout << str << " : " << diff << " ms" << std::endl;
}
EDIT
As suggested in comments, here is more information :-
- Compiler :- g++ - 4.5.2
- Flags :- none (i.e. default values)
- Optimizations :- None (I wanted to test the behavior in a usual setting. Optimization could vary the behavior of the program, for instance since the variable tmp is never used, the step of reading the vector/array may altogether be skipped or reduced to just the last assignment. At least that's what I understand).