Operations like this can be performed by incrementally moving many bits at a time using bit operations.
Example for a 32-bit integer:
uint32_t reverse(uint32_t x, size_t len)
{
assert(len > 0 && len <= 4);
x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1);
x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2);
x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4);
x = ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8);
x = ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16);
return x >> (32 - len * 8);
}
This should probably be the fastest and cache friendly implementation.
To implement similar for a custom width integer you need to be able to construct bit mask that has "X bit over X bit set" e.g. 1 bit after 1 (every second bit), 2 after 2 etc. The rest should be self-explanatory.
You can implement other variant based on this. If you need to choose them at runtime, it might be better to just use the widest version (64-bit) all the time to avoid branching.
Note: GCC is able to recognize byte-swap operation in the last two lines before the return statement and is able to generate bswap
on x86 and rev
on ARM. With 64-bit CPU this make 64-bit version equivalent to the version above in speed.
Apparently you cannot post an algorithm based on common techniques without referencing some paper/book/patent this days. So I am posting a highly patented, copyrighted and in every other way protected generalized version here. Note that you should think twice before using it (because of how patented it is).
Unfortunately, assembly generated out of this code is a complete crap. Only clang is able to somehow understand what is going on and optimize it away.
The original implementation I wrote few years ago was in C++ and heavily relied on templates to help the compiler. In the end it was a horrible and complicated mess, but could generate many bit-oriented algorithms for 8, 16, 32 and 64 integers (and even more if compiler supported them). Nevertheless, this can serve as a description of the algorithm above.
This code implements reverse algorithm for 8, 16, 32, 64 bit words using single instance of a 64 bit code. Helper functions repeat
, select
can be used as a basic for many bit algorithms (althougth they basically generate required bit masks).
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
/**
* @brief Mask with `n` bits set.
*/
static uint64_t bits(size_t n)
{
return (((1ull << n) - 1) | (-uint64_t(n >= 64)));
}
static uint64_t do_repeat(uint64_t x, size_t w, size_t n)
{
if (n == 0)
return x;
const size_t shift = w * (n - 1);
return (x << shift) | do_repeat(x, w, n - 1);
}
/**
* @brief Repeat pattern over 64-bit word.
*
* @code
* assert(repeat( 0x1, 32) == 0x5555555555555555);
* assert(repeat( 0x3, 16) == 0x3333333333333333);
* assert(repeat( 0xF, 8) == 0x0F0F0F0F0F0F0F0F);
* assert(repeat( 0xFF, 4) == 0x00FF00FF00FF00FF);
* assert(repeat( 0xFFFF, 2) == 0x0000FFFF0000FFFF);
* assert(repeat(0xFFFFFFFF, 1) == 0x00000000FFFFFFFF);
* assert(repeat( 0x1, 16) == 0x1111111111111111);
* assert(repeat( 0x12, 8) == 0x1212121212121212);
* assert(repeat( 0x1234, 4) == 0x1234123412341234);
* assert(repeat(0x12345678, 2) == 0x1234567812345678);
* @endcode
*/
static uint64_t repeat(uint64_t x, size_t n)
{
assert(n != 0);
return do_repeat(x, 64 / n, n);
}
/**
* @brief Selects `1 << n` bits over `1 << n` bits.
*
* @code
* assert(select(0) == 0x5555555555555555);
* assert(select(1) == 0x3333333333333333);
* assert(select(2) == 0x0F0F0F0F0F0F0F0F);
* assert(select(3) == 0x00FF00FF00FF00FF);
* assert(select(4) == 0x0000FFFF0000FFFF);
* assert(select(5) == 0x00000000FFFFFFFF);
* @endcode
*/
static uint64_t select(size_t n)
{
assert(n < 6);
return repeat(bits(1 << n), 1 << (5 - n));
}
static uint64_t do_reverse(uint64_t x, size_t n)
{
const size_t shift = (1ull << n);
const uint64_t lo = select(n) << 0;
const uint64_t hi = select(n) << shift;
x = ((x & lo) << shift) | ((x & hi) >> shift);
if (n == 0)
return x;
return do_reverse(x, n - 1);
}
uint64_t reverse64(uint64_t x)
{
return do_reverse(x, 5);
}
uint32_t reverse32(uint32_t x)
{
return do_reverse(x, 4);
}
uint16_t reverse16(uint32_t x)
{
return do_reverse(x, 3);
}
uint8_t reverse8(uint8_t x)
{
return do_reverse(x, 2);
}
int main()
{
assert(repeat( 0x1, 32) == 0x5555555555555555);
assert(repeat( 0x3, 16) == 0x3333333333333333);
assert(repeat( 0xF, 8) == 0x0F0F0F0F0F0F0F0F);
assert(repeat( 0xFF, 4) == 0x00FF00FF00FF00FF);
assert(repeat( 0xFFFF, 2) == 0x0000FFFF0000FFFF);
assert(repeat(0xFFFFFFFF, 1) == 0x00000000FFFFFFFF);
assert(repeat( 0x1, 16) == 0x1111111111111111);
assert(repeat( 0x12, 8) == 0x1212121212121212);
assert(repeat( 0x1234, 4) == 0x1234123412341234);
assert(repeat(0x12345678, 2) == 0x1234567812345678);
assert(select(0) == 0x5555555555555555);
assert(select(1) == 0x3333333333333333);
assert(select(2) == 0x0F0F0F0F0F0F0F0F);
assert(select(3) == 0x00FF00FF00FF00FF);
assert(select(4) == 0x0000FFFF0000FFFF);
assert(select(5) == 0x00000000FFFFFFFF);
assert(reverse8 ( 0xA5) == 0xA5);
assert(reverse16( 0xFEA5) == 0xA57F);
assert(reverse32( 0xFE0000A5) == 0xA500007F);
assert(reverse64(0xFE00FE0000A500A5) == 0xA500A500007F007F);
return 0;
}