The class A
has a template member function A::runImpl
. The function A::run
calls a specialized implementation based on the value of A::m_case
, which is set within the class constructor.
In my project, the run
function is called very frequently. It will speedup over 5% if the branch within it can be eliminated. Is there any template usage can do this?
My project is compiled by GCC 7.3.1 with C++14.
#include <iostream>
#include <cstdlib>
#include <cassert>
using namespace std;
class A {
public:
A (uint32_t * arr, size_t len) : m_case(0) {
for (size_t i = 0; i < len; ++i) {
m_case += arr[i];
}
}
template <size_t> void runImpl() { assert(0); };
void run();
private:
size_t m_case;
};
template <>
inline void A::runImpl<0>() {
cout << "Default execution path." << endl;
}
template <>
inline void A::runImpl<1>() {
cout << "Optimized execution path 1." << endl;
}
template <>
inline void A::runImpl<2>() {
cout << "Optimized execution path 2." << endl;
}
void A::run() {
switch (m_case) {
case 1:
runImpl<1>();
break;
case 2:
runImpl<2>();
break;
default:
runImpl<0>();
break;
}
}
int main() {
uint32_t arr[] = {1, 1};
A a(arr, 2);
a.run();
return 0;
}