-1

Is there any way to do a left shift of bit's, and add a space?
Using the least number of operations in c or c++.

⚡ My resolution
I did not specify that my necessity was a static operation, and it was not onerous to have to operate dynamically.
In this case I opted for a compiler macro.

#define EXPAND_2(bitmask)   (   ((bitmask & 0x0001) << 1)  |    /* Bit 0 expand    */ \
                                ((bitmask & 0x0002) << 2)  |    /* Bit 1 expand    */ \
                                ((bitmask & 0x0004) << 3)  |    /* Bit 2 expand    */ \
                                ((bitmask & 0x0008) << 4)  |    /* Bit 3 expand    */ \
                                ((bitmask & 0x0010) << 5)  |    /* Bit 4 expand    */ \
                                ((bitmask & 0x0020) << 6)  |    /* Bit 5 expand    */ \
                                ((bitmask & 0x0040) << 7)  |    /* Bit 6 expand    */ \
                                ((bitmask & 0x0080) << 8)  |    /* Bit 7 expand    */ \
                                ((bitmask & 0x0100) << 9)  |    /* Bit 8 expand    */ \
                                ((bitmask & 0x0200) << 10) |    /* Bit 9 expand    */ \
                                ((bitmask & 0x0400) << 11) |    /* Bit 10 expand   */ \
                                ((bitmask & 0x0800) << 12) |    /* Bit 11 expand   */ \
                                ((bitmask & 0x1000) << 13) |    /* Bit 12 expand   */ \
                                ((bitmask & 0x2000) << 14) |    /* Bit 13 expand   */ \
                                ((bitmask & 0x4000) << 15) |    /* Bit 14 expand   */ \
                                ((bitmask & 0x8000) << 16)      /* Bit 15 expand   */ \
                            )

the solution is used to set the pinmode registers of an STM32 where every 2 bits describes the configuration of a pin of the mcu.

thanks anyway even if the post has been closed.

  • 1
    duplicates: [What's a fast way to space-out bits within a word?](https://stackoverflow.com/q/47692902/995714), [How to insert zeros between bits in a bitmap?](https://stackoverflow.com/q/4597940/995714) – phuclv Mar 10 '22 at 09:27
  • Yeah just followed the links and realized that in your case there are only 16 possible inputs, so you can have a look-up table with 15 entries. That will be pretty fast. – Dialecticus Mar 10 '22 at 09:34
  • To avoid potential operator precedence issues, you should parenthesize `bitmask` in the macro expansion. – chqrlie Mar 11 '22 at 07:09

1 Answers1

1

If the number you want to transform has a limited range such as 4 bits, using a lookup table is both simple and efficient:

#include <stdint.h>

// For a 4-bit to 8-bit version:
uint8_t expand4(uint8_t v) {
    const static uint8_t table[16] = {
        0, 2, 8, 2+8, 32, 2+32, 8+32, 2+8+32,
        128, 2+128, 8+128, 2+8+128, 32+128, 2+32+128, 8+32+128, 2+8+32+128,
    };
    return table[v & 15];
}

// For a 8-bit to 16-bit version (counting on inline expansion):
uint16_t expand8(uint8_t v) {
    return (expand4(v >> 4) << 8) | expand4(v);
}

// For a 16-bit to 32-bit version (direct implementation):
uint32_t expand16(uint16_t v) {
    const static uint32_t t[16] = {
        0, 2, 8, 2+8, 32, 2+32, 8+32, 2+8+32,
        128, 2+128, 8+128, 2+8+128, 32+128, 2+32+128, 8+32+128, 2+8+32+128,
    };
    return t[v & 15] | (t[(v >> 4) & 15] << 8) | 
           (t[(v >> 8) & 15] << 16) | (t[(v >> 12) & 15] << 24);
}

Using a larger lookup table would reduce the number of instructions but the initializer will be much larger and bulky.

A simple loop would probably be less efficient because of branch mis-prediction:

#include <stdint.h>

uint32_t expand16(uint16_t v) {
    uint32_t res = 0;
    for (int shift = 1; v; v >>= 1, shift += 2) {
        res |= (v & 1) << shift;
    }
    return res;
}

Here is another approach without shift or branches but many stalls:

#include <stdint.h>

uint32_t expand16(uint16_t v) {
    uint32_t res = v;
    res += res & 0xFFF8000;
    res += res & 0xFFFC000;
    res += res & 0xFFFE000;
    res += res & 0xFFFF000;
    res += res & 0xFFFF800;
    res += res & 0xFFFFC00;
    res += res & 0xFFFFE00;
    res += res & 0xFFFFF00;
    res += res & 0xFFFFF80;
    res += res & 0xFFFFFC0;
    res += res & 0xFFFFFE0;
    res += res & 0xFFFFFF0;
    res += res & 0xFFFFFF8;
    res += res & 0xFFFFFFC;
    res += res & 0xFFFFFFE;
    return res + res;
}

Combining shifts and additions will improve this approach into an efficient solution:

#include <stdint.h>

uint32_t expand16(uint16_t v) {
    uint32_t res = ((uint32_t)v & 0xFF00) << 8) | (v & 0xFF);
    res = ((res & 0xF000F0) <<  4) | (res & 0xF000F);
    res += res & 0x08080808;
    res += res & 0x14141414;
    res += res & 0x2A2A2A2A;
    return res + res;  /* remove the last addition for expand_16_31 */
}
chqrlie
  • 131,814
  • 10
  • 121
  • 189