I always believed that the function below foo2
is faster than foo3
untile after a test.
All code below:
#include <iostream>
#include <boost/timer.hpp>
#include <boost/lexical_cast.hpp>
#include <stdint.h>
struct session {
bool operator==(const session& r) const;
uint8_t proto;
uint16_t sport;
uint16_t dport;
uint32_t sip;
uint32_t dip;
};
bool session::operator==(const session& r) const {
return proto == r.proto && sport == r.sport && dport == r.dport
&& sip == r.sip && dip == r.dip;
}
// my L1,L2,L3 total cache size is 16MB, so set it 32MB to overflow all 16MB caches.
static const int SIZE = 32 * 1024 * 1024 / sizeof(session);
int sum;
void foo1(session* p) {
session s = {1, 2, 3, 4, 5};
for (int i = 0; i < SIZE; i++)
if (p[i] == s)
sum++;
}
void foo2(session* p) {
session s = {1, 2, 3, 4, 5};
int n = SIZE - SIZE % 4;
int i;
for (i = 0; i < n; i += 4) {
if (p[i + 0] == s)
sum++;
if (p[i + 1] == s)
sum++;
if (p[i + 2] == s)
sum++;
if (p[i + 3] == s)
sum++;
}
/*
for (; i < SIZE; i++)
if (p[i] == s)
sum++;
*/
}
void foo3(session* p) {
session s = {1, 2, 3, 4, 5};
int n = SIZE - SIZE % 4;
int i;
for (i = 0; i < n; i += 4) {
if (p[i + 0] == s)
sum++;
else if (p[i + 1] == s)
sum++;
else if (p[i + 2] == s)
sum++;
else if (p[i + 3] == s)
sum++;
}
/*
for (; i < SIZE; i++)
if (p[i] == s)
sum++;
*/
}
int main(int argc, char* argv[]) {
if (argc < 2)
return -1;
int n = boost::lexical_cast<int>(argv[1]);
session* p = new session[SIZE];
boost::timer t;
for (int i = 0; i < n; i++)
foo1(p);
std::cout << t.elapsed() << std::endl;
t.restart();
for (int i = 0; i < n; i++)
foo2(p);
std::cout << t.elapsed() << std::endl;
t.restart();
for (int i = 0; i < n; i++)
foo3(p);
std::cout << t.elapsed() << std::endl;
delete [] p;
return 0;
}
test 1000 times, ./a.out 1000
output:
4.36
3.98
3.96
My machine:
CPU: Intel(R) Xeon(R) CPU E5-2420 0 @ 1.90GHz
Caches:
L1d cache: 32K
L1i cache: 32K
L2 cache: 256K
L3 cache: 15360K
In the test, foo2
and foo3
have equivalence performance. Due to foo2
could case
CPU execut all unrolled expressions in parallel, so foo3
is the same. It that right? If so, the else if
syntax is violate the C/C++ basic else if
semantics.
Is there someone explain it? Thanks a lot.
Update
My compiler is gcc 4.4.6 ins RedHat
g++ -Wall -O2 a.cpp