An event/task instance is created based on some incoming data in a thread and is sent to be processed in another thread. I'm trying to measure different implementations of processing those events in the second thread (I'd like to have static polymorphism).
I took and modified some benchmarks from std::variant
vs. inheritance vs. other ways (performance). To still better see the difference between approaches I introduced the REPEAT
macro. Now I have 2 questions:
- Using
#define REPEAT(x) x
i.e. without repeat, why do all solutions give similar results (I'd exect CRTP and Concepts approach to be the winning one)?
-----------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------
Switch 0.894 ns 0.894 ns 806843294
SwitchIndex 1.28 ns 1.28 ns 557002436
Virtual 1.24 ns 1.24 ns 578021842
VirtualConstexpr 1.39 ns 1.39 ns 501709273
VirtualFinal 1.19 ns 1.19 ns 596009101
CRTP 1.38 ns 1.38 ns 502987064
CRTPVisitor 1.62 ns 1.62 ns 427966003
Concepts 1.64 ns 1.64 ns 431103629
FunctionPointerList 2.41 ns 2.41 ns 284035164
GetIf 1.35 ns 1.35 ns 574041872
HoldsAlternative 1.70 ns 1.70 ns 359107896
VisitorConstexpr 1.41 ns 1.41 ns 501415083
VisitorStruct 1.40 ns 1.40 ns 496493666
VisitorStructConstexpr 1.39 ns 1.39 ns 469352143
VisitorOverload 1.40 ns 1.40 ns 502173810
- Using
#define REPEAT(x) REPEAT64(x)
gives the results shown below. Why have theCRTP
andConcepts
approaches similar performance as theVirtual
approach? Also, is theSwitch
approach really the most efficient one in a hotwhile
loop?
-----------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------
Switch 1.50 ns 1.50 ns 407931251
SwitchIndex 2.06 ns 2.05 ns 328622920
Virtual 107 ns 107 ns 5534355
VirtualConstexpr 86.3 ns 86.3 ns 8089307
VirtualFinal 159 ns 159 ns 6023758
CRTP 87.0 ns 87.0 ns 7970289
CRTPVisitor 83.5 ns 83.5 ns 7844207
Concepts 94.4 ns 94.4 ns 8334785
FunctionPointerList 168 ns 168 ns 4223302
GetIf 1.88 ns 1.88 ns 387054068
HoldsAlternative 2.58 ns 2.58 ns 410366402
VisitorConstexpr 90.1 ns 90.1 ns 8305487
VisitorStruct 97.8 ns 97.8 ns 7363107
VisitorStructConstexpr 83.0 ns 83.0 ns 8009493
VisitorOverload 84.3 ns 84.3 ns 8035098
Benchmark code:
/*
* Altered from https://stackoverflow.com/questions/57726401/stdvariant-vs-inheritance-vs-other-ways-performance
* TODO:
* type erasure idiom
* boost variant
* Mach7 pattern matching
*/
#include <array>
#include <variant>
#include <functional>
#include <benchmark/benchmark.h>
#include "util/macro.hpp"
#include "util/random.hpp"
#define REPEAT(x) REPEAT64(x)
constexpr int NUMBER_OBJECTS = 10;
benchmarks::util::RandomInInterval random_cls(0, 10);
template<class... Ts> struct overload : Ts... { using Ts::operator()...; };
template<class... Ts> overload(Ts...) -> overload<Ts...>;
template <std::size_t N>
std::array<int, N> get_random_array() {
std::array<int, N> item{};
for (int i = 0 ; i < N; i++)
item[i] = random_cls.get_random_int();
return item;
}
template <typename T, std::size_t N>
std::array<T, N> get_random_objects(std::function<T(decltype(random_cls.get_random_int()))> func) {
std::array<T, N> a{};
std::generate(a.begin(), a.end(), [&] {
return func(random_cls.get_random_int());
});
return a;
}
namespace structs {
struct S1 {[[nodiscard]]constexpr auto process() {return 1;}};
struct S2 {[[nodiscard]]constexpr auto process() {return 2;}};
struct S3 {[[nodiscard]]constexpr auto process() {return 3;}};
struct S4 {[[nodiscard]]constexpr auto process() {return 4;}};
S1 s1;
S2 s2;
S3 s3;
S4 s4;
}
static void Switch(benchmark::State& state) {
int r = 0;
int index = 0;
auto ran_arr = get_random_array<NUMBER_OBJECTS>();
auto pick_randomly = [&] () {
index = ran_arr[r++ % ran_arr.size()];
};
for (auto _ : state) {
int res;
REPEAT(
switch (index) {
case 0:
res = structs::s1.process();
break;
case 1:
res = structs::s2.process();
break;
case 2:
res = structs::s3.process();
break;
case 3:
res = structs::s4.process();
break;
default:
res = structs::s1.process();
break;
}
)
benchmark::DoNotOptimize(index);
benchmark::DoNotOptimize(res);
pick_randomly();
}
}
BENCHMARK(Switch);
namespace structs::switch_index {
using type = std::variant<structs::S1, structs::S2, structs::S3, structs::S4>;
auto switch_lambda = [](auto r) -> type {
switch (r) {
case 0:
return structs::s1;
case 1:
return structs::s2;
case 2:
return structs::s3;
case 3:
return structs::s4;
default:
return structs::s1;
};
};
}
static void SwitchIndex(benchmark::State& state) {
int r = 0;
structs::switch_index::type* package = nullptr;
auto tasks = get_random_objects<structs::switch_index::type, NUMBER_OBJECTS>(structs::switch_index::switch_lambda);
auto pick_randomly = [&] () {
package = &tasks[r++ % tasks.size()];
};
pick_randomly();
for (auto _ : state) {
int res;
REPEAT(
switch (package->index()) {
case 0:
res = std::get<structs::S1>(*package).process();
break;
case 1:
res = std::get<structs::S2>(*package).process();
break;
case 2:
res = std::get<structs::S3>(*package).process();
break;
case 3:
res = std::get<structs::S4>(*package).process();
break;
default:
res = std::get<structs::S1>(*package).process();
break;
}
)
benchmark::DoNotOptimize(package);
benchmark::DoNotOptimize(res);
pick_randomly();
}
}
BENCHMARK(SwitchIndex);
namespace structs::virtual_call {
struct Base {
virtual int process() = 0;
virtual ~Base() {};
};
struct A: public Base {int process(){return 1;}};
struct B: public Base {int process(){return 2;}};
struct C: public Base {int process(){return 3;}};
struct D: public Base {int process(){return 4;}};
auto switch_lambda = [] (auto r) -> Base* {
switch (r) {
case 0:
return new A;
case 1:
return new B;
case 2:
return new C;
case 3:
return new D;
default:
return new A;
}
};
}
static void Virtual(benchmark::State& state) {
int r = 0;
structs::virtual_call::Base* package = nullptr;
auto tasks = get_random_objects<structs::virtual_call::Base*, NUMBER_OBJECTS>(structs::virtual_call::switch_lambda);
auto pick_randomly = [&] () {
package = tasks[r++ % tasks.size()];
};
pick_randomly();
for (auto _ : state) {
int res;
REPEAT(res = package->process();)
benchmark::DoNotOptimize(package);
benchmark::DoNotOptimize(res);
pick_randomly();
}
for (auto&& i : tasks)
delete i;
}
BENCHMARK(Virtual);
namespace structs::virtual_constexpr {
struct Base {
virtual constexpr int process() const = 0;
virtual ~Base() {};
};
struct A: public Base {constexpr int process() const final {return 1;}};
struct B: public Base {constexpr int process() const final {return 2;}};
struct C: public Base {constexpr int process() const final {return 3;}};
struct D: public Base {constexpr int process() const final {return 4;}};
auto switch_lambda = [] (auto r) -> Base* {
switch (r) {
case 0:
return new A;
case 1:
return new B;
case 2:
return new C;
case 3:
return new D;
default:
return new A;
}
};
}
static void VirtualConstexpr(benchmark::State& state) {
int r = 0;
structs::virtual_constexpr::Base* package = nullptr;
auto tasks = get_random_objects<structs::virtual_constexpr::Base*, NUMBER_OBJECTS>(
structs::virtual_constexpr::switch_lambda);
auto pick_randomly = [&] () {
package = tasks[r++ % tasks.size()];
};
pick_randomly();
for (auto _ : state) {
int res;
REPEAT(res = package->process();)
pick_randomly();
benchmark::DoNotOptimize(package);
benchmark::DoNotOptimize(res);
}
for (auto&& i : tasks)
delete i;
}
BENCHMARK(VirtualConstexpr);
namespace structs::virtual_final {
struct Base {
virtual int process()= 0;
virtual ~Base() {}
};
struct A: public Base {int process() final {return 1;}};
struct B: public Base {int process() final {return 2;}};
struct C: public Base {int process() final {return 3;}};
struct D: public Base {int process() final {return 4;}};
auto swithc_lambda = [] (auto r) -> Base* {
switch(r) {
case 0: return new A;
case 1: return new B;
case 2: return new C;
case 3: return new D;
default: return new A;
}
};
}
static void VirtualFinal(benchmark::State& state) {
structs::virtual_final::Base* package = nullptr;
int r = 0;
auto tasks = get_random_objects<structs::virtual_final::Base*, NUMBER_OBJECTS>(structs::virtual_final::swithc_lambda);
auto pick_randomly = [&] () {
package = tasks[r++ % tasks.size()];
};
pick_randomly();
for (auto _ : state) {
int res;
REPEAT(res = package->process();)
benchmark::DoNotOptimize(package);
benchmark::DoNotOptimize(res);
pick_randomly();
}
for (auto&& i : tasks)
delete i;
}
BENCHMARK(VirtualFinal);
namespace structs::CRTP {
//From the "Pitfalls" section on https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern
struct TBaseInterface {
virtual ~TBaseInterface() = default;
virtual int process() = 0;
};
template<typename T>
struct TBase: public TBaseInterface {
int process() override {return static_cast<T*>(this)->process();}
};
struct TS1: public TBase<TS1> {int process() final {return 1;}};
struct TS2: public TBase<TS2> {int process() final {return 2;}};
struct TS3: public TBase<TS3> {int process() final {return 3;}};
struct TS4: public TBase<TS4> {int process() final {return 4;}};
auto switch_lambda = [] (auto r) -> TBaseInterface* {
switch(r) {
case 0: return new TS1;
case 1: return new TS2;
case 2: return new TS3;
case 3: return new TS4;
default: return new TS1;
}
};
}
static void CRTP(benchmark::State& state) {
int r = 0;
structs::CRTP::TBaseInterface* task = nullptr;
auto tasks = get_random_objects<structs::CRTP::TBaseInterface*, NUMBER_OBJECTS>(structs::CRTP::switch_lambda);
auto pick_randomly = [&] () {
task = tasks[r++ % tasks.size()];
};
pick_randomly();
for (auto _ : state) {
int res;
REPEAT(res = task->process();)
benchmark::DoNotOptimize(task);
benchmark::DoNotOptimize(res);
pick_randomly();
}
for (auto&& i : tasks)
delete i;
}
BENCHMARK(CRTP);
namespace structs::CRTPVisitor {
template<typename T>
struct TBase {
int process() {return static_cast<T const&>(*this).process();}
};
struct TS1: public TBase<TS1> {int process() {return 1;}};
struct TS2: public TBase<TS2> {int process() {return 2;}};
struct TS3: public TBase<TS3> {int process() {return 3;}};
struct TS4: public TBase<TS4> {int process() {return 4;}};
using type = std::variant<TS1, TS2, TS3, TS4>;
auto switch_lambda = [] (auto r) -> type {
switch(r) {
case 0: return TS1();
case 1: return TS2();
case 2: return TS3();
case 3: return TS4();
default: return TS1();
}
};
auto ov = overload {
[] (TS1& t) { return t.process(); },
[] (TS2& t) { return t.process(); },
[] (TS3& t) { return t.process(); },
[] (TS4& t) { return t.process(); },
};
}
static void CRTPVisitor(benchmark::State& state) {
int r = 0;
structs::CRTPVisitor::type* task = nullptr;
auto tasks = get_random_objects<structs::CRTPVisitor::type, NUMBER_OBJECTS>(structs::CRTPVisitor::switch_lambda);
auto pick_randomly = [&] () {
task = &tasks[r++ % tasks.size()];
};
pick_randomly();
for (auto _ : state) {
int res;
REPEAT(res = std::visit(structs::CRTPVisitor::ov, *task);)
pick_randomly();
benchmark::DoNotOptimize(task);
benchmark::DoNotOptimize(res);
}
}
BENCHMARK(CRTPVisitor);
namespace structs::concepts {
struct Base {
virtual ~Base() = default;
virtual int process() = 0;
};
struct S1:Base {int process() {return 1;}};
struct S2:Base {int process() {return 2;}};
struct S3:Base {int process() {return 3;}};
struct S4:Base {int process() {return 4;}};
template<typename T>
concept GetLike = requires(T* t) {t->process();};
template<GetLike T>
int process(T* t) {return t->process();}
auto switch_lambda = [] (auto r) -> Base* {
switch(r) {
case 0: return new S1;
case 1: return new S2;
case 2: return new S3;
case 3: return new S4;
default: return new S1;
}
};
}
static void Concepts(benchmark::State& state) {
int r = 0;
structs::concepts::Base* task = nullptr;
auto tasks = get_random_objects<structs::concepts::Base*, NUMBER_OBJECTS>(structs::concepts::switch_lambda);
auto pick_randomly = [&] () {
task = tasks[r++ % tasks.size()];
};
pick_randomly();
for (auto _ : state) {
int res;
REPEAT(res = structs::concepts::process(task);)
//same result with: REPEAT(res = task->process();)
pick_randomly();
benchmark::DoNotOptimize(task);
benchmark::DoNotOptimize(res);
}
for (auto&& i : tasks)
delete i;
}
BENCHMARK(Concepts);
namespace structs::functional_pointer_list {
using type = std::function<int()>;
auto switch_lambda = [] (auto r) -> type {
switch(r) {
case 0: return std::bind(&structs::S1::process, structs::s1);
case 1: return std::bind(&structs::S2::process, structs::s2);
case 2: return std::bind(&structs::S3::process, structs::s3);
case 3: return std::bind(&structs::S4::process, structs::s4);
default: return std::bind(&structs::S1::process, structs::s1);
}
};
}
static void FunctionPointerList(benchmark::State& state) {
int r = 0;
std::size_t index;
auto tasks = get_random_objects<structs::functional_pointer_list::type, NUMBER_OBJECTS>(
structs::functional_pointer_list::switch_lambda);
auto pick_randomly = [&] () {
index = r++ % tasks.size();
};
pick_randomly();
for (auto _ : state) {
int res;
REPEAT( res = tasks[index]();)
benchmark::DoNotOptimize(index);
benchmark::DoNotOptimize(res);
pick_randomly();
}
}
BENCHMARK(FunctionPointerList);
namespace structs::getif {
using type = std::variant<structs::S1, structs::S2, structs::S3, structs::S4>;
auto switch_lambda = [] (auto r) -> type {
switch(r) {
case 0: return structs::s1;
case 1: return structs::s2;
case 2: return structs::s3;
case 3: return structs::s4;
default: return structs::s1;
}
};
}
static void GetIf(benchmark::State& state) {
int r = 0;
structs::getif::type* package = nullptr;
auto tasks = get_random_objects<structs::getif::type, NUMBER_OBJECTS>(structs::getif::switch_lambda);
auto pick_randomly = [&] () {
package = &tasks[r++ % tasks.size()];
};
pick_randomly();
for (auto _ : state) {
int res;
REPEAT(
if (auto item = std::get_if<structs::S1>(package)) {
res = item->process();
} else if (auto item = std::get_if<structs::S2>(package)) {
res = item->process();
} else if (auto item = std::get_if<structs::S3>(package)) {
res = item->process();
} else if (auto item = std::get_if<structs::S4>(package)) {
res = item->process();
} else {
res = item->process();
}
)
benchmark::DoNotOptimize(package);
benchmark::DoNotOptimize(res);
pick_randomly();
}
}
BENCHMARK(GetIf);
namespace structs::holds_alternative {
using type = std::variant<structs::S1, structs::S2, structs::S3, structs::S4>;
auto switch_lambda = [] (auto r) -> type {
switch(r) {
case 0: return structs::s1;
case 1: return structs::s2;
case 2: return structs::s3;
case 3: return structs::s4;
default: return structs::s1;
}
};
}
static void HoldsAlternative(benchmark::State& state) {
int r = 0;
structs::holds_alternative::type* package = nullptr;
auto tasks = get_random_objects<structs::holds_alternative::type, NUMBER_OBJECTS>(
structs::holds_alternative::switch_lambda);
auto pick_randomly = [&] () {
package = &tasks[r++ % tasks.size()];
};
pick_randomly();
for (auto _ : state) {
int res;
REPEAT(
if (std::holds_alternative<structs::S1>(*package)) {
res = std::get<structs::S1>(*package).process();
} else if (std::holds_alternative<structs::S2>(*package)) {
res = std::get<structs::S2>(*package).process();
} else if (std::holds_alternative<structs::S3>(*package)) {
res = std::get<structs::S3>(*package).process();
} else if (std::holds_alternative<structs::S4>(*package)) {
res = std::get<structs::S4>(*package).process();
} else {
res = std::get<structs::S1>(*package).process();
}
)
benchmark::DoNotOptimize(package);
benchmark::DoNotOptimize(res);
pick_randomly();
}
}
BENCHMARK(HoldsAlternative);
namespace structs::visitor_constexpr {
using type = std::variant<structs::S1, structs::S2, structs::S3, structs::S4>;
auto switch_lambda = [] (auto r) -> type {
switch(r) {
case 0: return structs::s1;
case 1: return structs::s2;
case 2: return structs::s3;
case 3: return structs::s4;
default: return structs::s1;
}
};
}
static void VisitorConstexpr(benchmark::State& state) {
int r = 0;
structs::visitor_constexpr::type* package = nullptr;
auto tasks = get_random_objects<structs::visitor_constexpr::type, NUMBER_OBJECTS>(
structs::visitor_constexpr::switch_lambda);
auto pick_randomly = [&] () {
package = &tasks[r++ % tasks.size()];
};
pick_randomly();
auto func = [] (auto& ref) {
using type = std::decay_t<decltype(ref)>;
if constexpr (std::is_same<type, structs::S1>::value) {return ref.process();}
else if constexpr (std::is_same<type, structs::S2>::value) {return ref.process();}
else if constexpr (std::is_same<type, structs::S3>::value) {return ref.process();}
else if constexpr (std::is_same<type, structs::S4>::value) {return ref.process();}
else {return 0;}
};
for (auto _ : state) {
int res;
REPEAT(res = std::visit(func, *package);)
benchmark::DoNotOptimize(package);
benchmark::DoNotOptimize(res);
pick_randomly();
}
}
BENCHMARK(VisitorConstexpr);
namespace structs::visitor_struct {
using type = std::variant<structs::S1, structs::S2, structs::S3, structs::S4>;
auto switch_lambda = [] (auto r) -> type {
switch(r) {
case 0: return structs::s1;
case 1: return structs::s2;
case 2: return structs::s3;
case 3: return structs::s4;
default: return structs::s1;
}
};
struct VisitPackage {
auto operator()(structs::S1& r) { return r.process(); }
auto operator()(structs::S2& r) { return r.process(); }
auto operator()(structs::S3& r) { return r.process(); }
auto operator()(structs::S4& r) { return r.process(); }
};
}
static void VisitorStruct(benchmark::State& state) {
int r = 0;
structs::visitor_struct::type* package = nullptr;
auto tasks = get_random_objects<structs::visitor_struct::type, NUMBER_OBJECTS>(structs::visitor_struct::switch_lambda);
auto pick_randomly = [&] () {
package = &tasks[r++ % tasks.size()];
};
pick_randomly();
auto vs = structs::visitor_struct::VisitPackage();
for (auto _ : state) {
int res;
REPEAT(res = std::visit(vs, *package);)
benchmark::DoNotOptimize(package);
benchmark::DoNotOptimize(res);
pick_randomly();
}
}
BENCHMARK(VisitorStruct);
namespace structs::visitor_struct_constexpr {
using type = std::variant<structs::S1, structs::S2, structs::S3, structs::S4>;
auto switch_lambda = [] (auto r) -> type {
switch(r) {
case 0: return structs::s1;
case 1: return structs::s2;
case 2: return structs::s3;
case 3: return structs::s4;
default: return structs::s1;
}
};
struct VisitPackage {
constexpr auto operator()(structs::S1& r) { return r.process(); }
constexpr auto operator()(structs::S2& r) { return r.process(); }
constexpr auto operator()(structs::S3& r) { return r.process(); }
constexpr auto operator()(structs::S4& r) { return r.process(); }
};
}
static void VisitorStructConstexpr(benchmark::State& state) {
int r = 0;
structs::visitor_struct_constexpr::type* package = nullptr;
auto tasks = get_random_objects<structs::visitor_struct_constexpr::type, NUMBER_OBJECTS>(
structs::visitor_struct_constexpr::switch_lambda);
auto pick_randomly = [&] () {
package = &tasks[r++ % tasks.size()];
};
pick_randomly();
auto vs = structs::visitor_struct_constexpr::VisitPackage();
for (auto _ : state) {
int res;
REPEAT(res = std::visit(vs, *package);)
benchmark::DoNotOptimize(package);
benchmark::DoNotOptimize(res);
pick_randomly();
}
}
BENCHMARK(VisitorStructConstexpr);
static void VisitorOverload(benchmark::State& state) {
int r = 0;
structs::visitor_struct::type* package = nullptr;
auto tasks = get_random_objects<structs::visitor_struct::type, NUMBER_OBJECTS>(
structs::visitor_struct::switch_lambda);
auto pick_randomly = [&] () {
package = &tasks[r++ % tasks.size()];
};
pick_randomly();
auto ov = overload {
[] (structs::S1& r) { return r.process(); },
[] (structs::S2& r) { return r.process(); },
[] (structs::S3& r) { return r.process(); },
[] (structs::S4& r) { return r.process(); },
};
for (auto _ : state) {
int res;
REPEAT(res = std::visit(ov, *package);)
benchmark::DoNotOptimize(package);
benchmark::DoNotOptimize(res);
pick_randomly();
}
}
BENCHMARK(VisitorOverload);
BENCHMARK_MAIN();