#include <random>
int main() {
std::vector<double> norms;
norms.reserve(1000000);
std::mt19937_64 mtEngine(42);
std::normal_distribution<> nd;
for (int i = 0; i != 1000000; ++i) {
norms.push_back(nd(mtEngine));
}
}
g++ -std=c++17 -O3
(version 10.2.0) and clang++ -std=c++17 -O3
(version 11.0.0) generate binaries that have significant difference in performance.
$ time ./random_clang
./random_clang 0.11s user 0.00s system 99% cpu 0.113 total
$ time ./random_gcc
./random_gcc 0.03s user 0.00s system 99% cpu 0.032 total
Here are the results from Compiler Explorer and valgrind --tool=callgrind
.
./random_clang
--------------------------------------------------------------------------------
Ir
--------------------------------------------------------------------------------
278,231,181 PROGRAM TOTALS
--------------------------------------------------------------------------------
Ir file:function
--------------------------------------------------------------------------------
135,606,558 ???:double std::generate_canonical<double, 53ul, std::mersenne_twister_engine<unsigned long, 64ul, 312ul, 156ul, 31ul, 13043109905998158313ul, 29ul, 6148914691236517205ul, 17ul, 8202884508482404352ul, 37ul, 18444473444759240704ul, 43ul, 6364136223846793005ul> >(std::mersenne_twister_engine<unsigned long, 64ul, 312ul, 156ul, 31ul, 13043109905998158313ul, 29ul, 6148914691236517205ul, 17ul, 8202884508482404352ul, 37ul, 18444473444759240704ul, 43ul, 6364136223846793005ul>&) [/home/xxx/EffectiveCpp/test/random_clang]
53,449,536 /build/glibc-eX1tMB/glibc-2.31/math/../sysdeps/x86_64/fpu/e_logl.S:__ieee754_logl [/usr/lib/x86_64-linux-gnu/libm-2.31.so]
32,096,514 ???:main [/home/xxx/EffectiveCpp/test/random_clang]
27,997,376 /build/glibc-eX1tMB/glibc-2.31/math/w_logl_compat.c:logl [/usr/lib/x86_64-linux-gnu/libm-2.31.so]
22,905,902 /build/glibc-eX1tMB/glibc-2.31/math/../sysdeps/ieee754/dbl-64/e_log.c:__ieee754_log_fma [/usr/lib/x86_64-linux-gnu/libm-2.31.so]
2,500,000 /build/glibc-eX1tMB/glibc-2.31/math/./w_log_template.c:log@@GLIBC_2.29 [/usr/lib/x86_64-linux-gnu/libm-2.31.so]
1,000,000 ???:0x0000000004a322f0 [???]
./random_gcc
--------------------------------------------------------------------------------
Ir
--------------------------------------------------------------------------------
125,607,194 PROGRAM TOTALS
--------------------------------------------------------------------------------
Ir file:function
--------------------------------------------------------------------------------
75,746,682 ???:main [/home/xxx/EffectiveCpp/test/random_gcc]
22,905,902 /build/glibc-eX1tMB/glibc-2.31/math/../sysdeps/ieee754/dbl-64/e_log.c:__ieee754_log_fma [/usr/lib/x86_64-linux-gnu/libm-2.31.so]
19,769,747 ???:std::mersenne_twister_engine<unsigned long, 64ul, 312ul, 156ul, 31ul, 13043109905998158313ul, 29ul, 6148914691236517205ul, 17ul, 8202884508482404352ul, 37ul, 18444473444759240704ul, 43ul, 6364136223846793005ul>::_M_gen_rand() [/home/xxx/EffectiveCpp/test/random_gcc]
2,500,000 /build/glibc-eX1tMB/glibc-2.31/math/./w_log_template.c:log@@GLIBC_2.29 [/usr/lib/x86_64-linux-gnu/libm-2.31.so]
1,000,000 ???:0x00000000001090f0 [???]
1,000,000 ???:0x0000000004a322f0 [???]
916,425 /build/glibc-eX1tMB/glibc-2.31/elf/dl-lookup.c:_dl_lookup_symbol_x [/usr/lib/x86_64-linux-gnu/ld-2.31.so]
544,815 /build/glibc-eX1tMB/glibc-2.31/elf/dl-lookup.c:do_lookup_x [/usr/lib/x86_64-linux-gnu/ld-2.31.so]
Why does the clang++
version spend so much time in calling std::generate_canonical
? I have seen people claiming that g++
does inline more aggressively but changing the options on clang++
does not really help in my case (-mllvm -inline-threshold=10000
).
Is this a bug or am I missing some other important compiler options? I know there are other ways to generate normally distributed random variables faster but I do not think this kind of speed inconsistency on a commonly used standard library function is normal.
UPDATE: It seems that after I linked the clang++
version to libc++
with -stdlib=libc++ -lc++abi
, the performance became on par with the original g++
version.
$ time ./random_perf
./random_perf 0.03s user 0.00s system 98% cpu 0.027 total
./random_perf
--------------------------------------------------------------------------------
Ir
--------------------------------------------------------------------------------
147,608,621 PROGRAM TOTALS
--------------------------------------------------------------------------------
Ir file:function
--------------------------------------------------------------------------------
106,311,924 /usr/lib/llvm-10/bin/../include/c++/v1/random:double std::__1::normal_distribution<double>::operator()<std::__1::mersenne_twister_engine<unsigned long, 64ul, 312ul, 156ul, 31ul, 13043109905998158313ul, 29ul, 6148914691236517205ul, 17ul, 8202884508482404352ul, 37ul, 18444473444759240704ul, 43ul, 6364136223846793005ul> >(std::__1::mersenne_twister_engine<unsigned long, 64ul, 312ul, 156ul, 31ul, 13043109905998158313ul, 29ul, 6148914691236517205ul, 17ul, 8202884508482404352ul, 37ul, 18444473444759240704ul, 43ul, 6364136223846793005ul>&, std::__1::normal_distribution<double>::param_type const&) [/home/xxx/EffectiveCpp/bin/random_perf]
22,905,902 /build/glibc-eX1tMB/glibc-2.31/math/../sysdeps/ieee754/dbl-64/e_log.c:__ieee754_log_fma [/usr/lib/x86_64-linux-gnu/libm-2.31.so]
6,000,007 /usr/lib/llvm-10/bin/../include/c++/v1/vector:main
3,003,122 /usr/lib/llvm-10/bin/../include/c++/v1/random:main
3,000,016 /home/xxx/EffectiveCpp/src/random_perf.cpp:main [/home/xxx/EffectiveCpp/bin/random_perf]
2,500,000 /build/glibc-eX1tMB/glibc-2.31/math/./w_log_template.c:log@@GLIBC_2.29 [/usr/lib/x86_64-linux-gnu/libm-2.31.so]
1,000,002 /usr/lib/llvm-10/bin/../include/c++/v1/memory:main
1,000,000 ???:0x000000000494b2f0 [???]
507,749 /build/glibc-eX1tMB/glibc-2.31/elf/dl-lookup.c:_dl_lookup_symbol_x [/usr/lib/x86_64-linux-gnu/ld-2.31.so]