It's simple - solve the simplest case, then do more complex ones.
Case 1: Duplicate 1 bit into an 4 bit value (the simplest).
+---+---------+
| 0 | _ _ _ A |
+---+---------+
| 1 | A A A A |
+---+---------+
This can be done as a simple set of shifts:
x = (x << 0) | (x << 1) | (x << 2) | (x << 3);
Or in a less obvious but faster way:
x = (x << 4) - x;
This step will be the last step in all following cases.
Case 2: Duplicate 2 bits into an 8 bit value.
+---+---------+---------+
| 0 | _ _ _ _ | _ _ A B |
+---+---------+---------+
| 1 | _ _ _ A | _ _ _ B |
+---+---------+---------+
| 2 | A A A A | B B B B |
+---+---------+---------+
Case 3: Duplicate 4 bits into a 16 bit value. How? Just move 2 bits to the upper part to turn it into the case 1! Divide and conquer!
+---+---------+---------+---------+---------+
| 0 | _ _ _ _ | _ _ _ _ | _ _ _ _ | A B C D |
+---+---------+---------+---------+---------+
| 1 | _ _ _ _ | _ _ A B | _ _ _ _ | _ _ C D |
+---+---------+---------+---------+---------+
| 2 | _ _ _ A | _ _ _ B | _ _ _ C | _ _ _ D |
+---+---------+---------+---------+---------+
| 3 | A A A A | B B B B | C C C C | D D D D |
+---+---------+---------+---------+---------+
Case 4: Duplicate 8 bits into a 32 bit value (the original).
+---+---------+---------+---------+---------+---------+---------+---------+---------+
| 0 | _ _ _ _ | _ _ _ _ | _ _ _ _ | _ _ _ _ | _ _ _ _ | _ _ _ _ | A B C D | E F G H |
+---+---------+---------+---------+---------+---------+---------+---------+---------+
| 1 | _ _ _ _ | _ _ _ _ | _ _ _ _ | A B C D | _ _ _ _ | _ _ _ _ | _ _ _ _ | E F G H |
+---+---------+---------+---------+---------+---------+---------+---------+---------+
| 2 | _ _ _ _ | _ _ A B | _ _ _ _ | _ _ C D | _ _ _ _ | _ _ E F | _ _ _ _ | _ _ G H |
+---+---------+---------+---------+---------+---------+---------+---------+---------+
| 3 | _ _ _ A | _ _ _ B | _ _ _ C | _ _ _ D | _ _ _ E | _ _ _ F | _ _ _ G | _ _ _ H |
+---+---------+---------+---------+---------+---------+---------+---------+---------+
| 4 | A A A A | B B B B | C C C C | D D D D | E E E E | F F F F | G G G G | H H H H |
+---+---------+---------+---------+---------+---------+---------+---------+---------+
Can be achieved by the code below:
uint32_t interleave(uint8_t value)
{
uint32_t x = value;
x = (x | (x << 12)) /* & 0x000F000F */; // GCC is not able to remove redundant & here
x = (x | (x << 6)) & 0x03030303;
x = (x | (x << 3)) & 0x11111111;
x = (x << 4) - x;
return x;
}
Some test cases to check that it works:
TEST_F(test, interleave)
{
EXPECT_EQ(interleave(0x00), 0x00000000);
EXPECT_EQ(interleave(0x11), 0x000F000F);
EXPECT_EQ(interleave(0x22), 0x00F000F0);
EXPECT_EQ(interleave(0x33), 0x00FF00FF);
EXPECT_EQ(interleave(0x44), 0x0F000F00);
EXPECT_EQ(interleave(0x55), 0x0F0F0F0F);
EXPECT_EQ(interleave(0x66), 0x0FF00FF0);
EXPECT_EQ(interleave(0x77), 0x0FFF0FFF);
EXPECT_EQ(interleave(0x88), 0xF000F000);
EXPECT_EQ(interleave(0x99), 0xF00FF00F);
EXPECT_EQ(interleave(0xAA), 0xF0F0F0F0);
EXPECT_EQ(interleave(0xBB), 0xF0FFF0FF);
EXPECT_EQ(interleave(0xCC), 0xFF00FF00);
EXPECT_EQ(interleave(0xDD), 0xFF0FFF0F);
EXPECT_EQ(interleave(0xEE), 0xFFF0FFF0);
EXPECT_EQ(interleave(0xFF), 0xFFFFFFFF);
EXPECT_EQ(interleave(0x01), 0x0000000F);
EXPECT_EQ(interleave(0x23), 0x00F000FF);
EXPECT_EQ(interleave(0x45), 0x0F000F0F);
EXPECT_EQ(interleave(0x67), 0x0FF00FFF);
EXPECT_EQ(interleave(0x89), 0xF000F00F);
EXPECT_EQ(interleave(0xAB), 0xF0F0F0FF);
EXPECT_EQ(interleave(0xCD), 0xFF00FF0F);
EXPECT_EQ(interleave(0xEF), 0xFFF0FFFF);
}