Consider below code:
#include <cstdint>
#include <bit>
#include <utility>
struct A { uint32_t a[100]; };
struct B { uint16_t b[200]; };
void test(const A&);
void foo() {
B tmp;
test(std::bit_cast<A>(std::move(tmp)));
}
void bar() {
B tmp;
test(reinterpret_cast<A&>(tmp));
}
For clang 15 with -O3, foo and bar are equivalent, but for GCC 12.2 with -O3, foo needs to do data copy (rep movsq).
foo():
sub rsp, 808
mov ecx, 50
lea rdi, [rsp+400]
mov rsi, rsp
rep movsq
lea rdi, [rsp+400]
call test(A const&)
add rsp, 808
ret
bar():
sub rsp, 408
mov rdi, rsp
call test(A const&)
add rsp, 408
ret
Which compiler option can make GCC optimize such thing like Clang? Thanks. P.S. -Ofast is not helpful for this question.
[Edit] Based on the answer provided by user17732522, I modified the code to be:
#include <cstdint>
#include <bit>
struct A { uint32_t a[100]; };
struct B { uint16_t b[200]; };
void test(const A&);
void foo(B arg) {
test(std::bit_cast<A>(arg));
}
void bar(B arg) {
test(reinterpret_cast<A&>(arg));
}
Now both GCC and Clang use data copy for foo. So, looks like std::bit_cast is not intended to cover this kind of cases.