I have been developing a simple framework for embedded environments. I came to a design decision on whether to use virtual calls, CRTP, or maybe a switch statement. I have been told that vtables perform poorly in embedded.
Following up from this question
vftable performance penalty vs. switch statement
I decided to run my own test. I ran three different ways to call a member function.
- using the etl library's etl::function, a library meant to mimic the stl library but for embedded environments.(no dynamic allocations).
- using a master switch statement that will call an object's based on an object's int ID
- using a pure virtual call to a base class
I never tried this with a basic CRTP pattern but the etl::function was supposed to be a variation on that where that was the mechanism used for the pattern. The time I got on MSVC and similar performance on an ARM Cortex M4 was
- etl : 400 million nanoseconds
- switch : 420 million nanoseconds
- virtual: 290 million nanoseconds
The pure virtual calls are significantly faster. Am I missing something or are virtual calls just not as bad as people make them out to be. Here is the code used for the tests.
class testetlFunc
{
public:
uint32_t a;
testetlFunc() { a = 0; };
void foo();
};
class testetlFunc2
{
public:
uint32_t a;
testetlFunc2() { a = 0; };
virtual void foo() = 0;
};
void testetlFunc::foo()
{
a++;
}
class testetlFuncDerived : public testetlFunc2
{
public:
testetlFuncDerived();
void foo() override;
};
testetlFuncDerived::testetlFuncDerived()
{
}
void testetlFuncDerived::foo()
{
a++;
}
etl::ifunction<void>* timer1_callback1;
etl::ifunction<void>* timer1_callback2;
etl::ifunction<void>* timer1_callback3;
etl::ifunction<void>* timer1_callback4;
etl::ifunction<void>* etlcallbacks[4];
testetlFunc ttt;
testetlFunc ttt2;
testetlFunc ttt3;
testetlFunc ttt4;
testetlFuncDerived tttd1;
testetlFuncDerived tttd2;
testetlFuncDerived tttd3;
testetlFuncDerived tttd4;
testetlFunc2* tttarr[4];
static void MasterCallingFunction(uint16_t ID) {
switch (ID)
{
case 1:
ttt.foo();
break;
case 2:
ttt2.foo();
break;
case 3:
ttt3.foo();
break;
case 4:
ttt4.foo();
break;
default:
break;
}
};
int main()
{
tttarr[0] = (testetlFunc2*)&tttd1;
tttarr[1] = (testetlFunc2*)&tttd2;
tttarr[2] = (testetlFunc2*)&tttd3;
tttarr[3] = (testetlFunc2*)&tttd4;
etl::function_imv<testetlFunc, ttt, &testetlFunc::foo> k;
timer1_callback1 = &k;
etl::function_imv<testetlFunc, ttt2, &testetlFunc::foo> k2;
timer1_callback2 = &k2;
etl::function_imv<testetlFunc, ttt3, &testetlFunc::foo> k3;
timer1_callback3 = &k3;
etl::function_imv<testetlFunc, ttt4, &testetlFunc::foo> k4;
timer1_callback4 = &k4;
etlcallbacks[0] = timer1_callback1;
etlcallbacks[1] = timer1_callback2;
etlcallbacks[2] = timer1_callback3;
etlcallbacks[3] = timer1_callback4;
//results for etl::function --------------
int rng;
srand(time(0));
StartTimer(1)
for (uint32_t i = 0; i < 2000000; i++)
{
rng = rand() % 4 + 0;
for (uint16_t j= 0; j < 4; j++)
{
(*etlcallbacks[rng])();
}
}
StopTimer(1)
//results for switch --------------
StartTimer(2)
for (uint32_t i = 0; i < 2000000; i++)
{
rng = rand() % 4 + 0;
for (uint16_t j = 0; j < 4; j++)
{
MasterCallingFunction(rng);
}
}
StopTimer(2)
//results for virtual vtable --------------
StartTimer(3)
for (uint32_t i = 0; i < 2000000; i++)
{
rng = rand() % 4 + 0;
for (uint16_t j = 0; j < 4; j++)
{
tttarr[rng]->foo();
//ttt.foo();
}
}
StopTimer(3)
PrintAllTimerDuration
}