To propose an alternative solution, the best way to copy bits and avoid UB is through memcpy
:
template<typename INT_T>
INT_T read_big_endian(uint8_t const *data) {
std::make_unsigned_t<INT_T> tmp = 0;
for (size_t i = 0; i < sizeof(INT_T); i++) {
tmp <<= 8;
tmp |= *data;
data++;
}
INT_T result;
memcpy(&result, &tmp, sizeof(tmp));
return result;
}
With this you won't get UB from casting an unsigned to signed type, and with optomizations, this compiles to the exact same assembly as your examples.
#include <cstdint>
#include <cstring>
#include <type_traits>
template<typename INT_T>
INT_T read_big_endian(uint8_t const *data) {
std::make_unsigned_t<INT_T> tmp = 0;
for (std::size_t i = 0; i < sizeof(INT_T); i++) {
tmp <<= 8;
tmp |= *data;
data++;
}
return static_cast<INT_T>(tmp);
}
template<typename INT_T>
INT_T read_big_endian2(uint8_t const *data) {
std::make_unsigned_t<INT_T> tmp = 0;
for (std::size_t i = 0; i < sizeof(INT_T); i++) {
tmp <<= 8;
tmp |= *data;
data++;
}
INT_T res;
memcpy(&res, &tmp, sizeof(res));
return res;
}
// Just to manifest the template expansions.
auto read32_1(uint8_t const *data) {
return read_big_endian<int32_t>(data);
}
auto read32_2(uint8_t const *data) {
return read_big_endian2<int32_t>(data);
}
auto read64_1(uint8_t const *data) {
return read_big_endian<int64_t>(data);
}
auto read64_2(uint8_t const *data) {
return read_big_endian2<int64_t>(data);
}
Compiles with clang++ /tmp/test.cpp -std=c++17 -c -O3
to:
_Z8read32_1PKh: # read32_1
movl (%rdi), %eax
bswapl %eax
retq
_Z8read32_2PKh: # read32_2
movl (%rdi), %eax
bswapl %eax
retq
_Z8read64_1PKh: # read64_1
movzbl (%rdi), %eax
shlq $8, %rax
movzbl 1(%rdi), %ecx
orq %rax, %rcx
shlq $8, %rcx
movzbl 2(%rdi), %eax
orq %rcx, %rax
shlq $8, %rax
movzbl 3(%rdi), %ecx
orq %rax, %rcx
shlq $8, %rcx
movzbl 4(%rdi), %eax
orq %rcx, %rax
shlq $8, %rax
movzbl 5(%rdi), %ecx
orq %rax, %rcx
shlq $8, %rcx
movzbl 6(%rdi), %edx
orq %rcx, %rdx
shlq $8, %rdx
movzbl 7(%rdi), %eax
orq %rdx, %rax
retq
_Z8read64_2PKh: # read64_2
movzbl (%rdi), %eax
shlq $8, %rax
movzbl 1(%rdi), %ecx
orq %rax, %rcx
shlq $8, %rcx
movzbl 2(%rdi), %eax
orq %rcx, %rax
shlq $8, %rax
movzbl 3(%rdi), %ecx
orq %rax, %rcx
shlq $8, %rcx
movzbl 4(%rdi), %eax
orq %rcx, %rax
shlq $8, %rax
movzbl 5(%rdi), %ecx
orq %rax, %rcx
shlq $8, %rcx
movzbl 6(%rdi), %edx
orq %rcx, %rdx
shlq $8, %rdx
movzbl 7(%rdi), %eax
orq %rdx, %rax
retq
on x86_64-linux-gnu with clang++ v8
.
Most of the time, memcpy
with optimizations will compile to the exact same assembly as what you intend, but with the added benefit of no UB.
Updating for corectness: The OP correctly notes that this would still be invalid since signed int representations do not need to be two's complement (at least until C++20) and this would be implementation-defined behavior.
AFAICT, up until C++20, there doesn't actually seem to be a neat C++ way of performing bit-level operations on ints without actually knowing the bit representation of a signed int, which is implementation-defined. That being said, as long as you know your compiler will represent a C++ integral type as two's complement, then both using memcpy
or the static_cast
in the OP's second example should work.
Part of the major reason C++20 is exclusively representing signed ints as two's complement is because most existing compilers already represent them as two's complement. Both GCC and LLVM (and thus Clang) already internally use two's complement.
This doesn't seem entirely portable (and it's understandable if this isn't the best answer), but I would imagine that you know what compiler you'll be building your code with, so you can technically wrap this or your second example with checks to see you're using an appropriate compiler.