
It looks like this code takes about 50 clocks per bit swap on my i7 XPS 8500 machine. 7.6 seconds for a million array flips. Single threaded. It prints some ASCI art based on patterns of 1s and 0s. I rotated the pic left 180 degrees after reversing the bit array, using a graphic editor, and they look identical to me. A double-reversed image comes out the same as the original.
As for pluses, it's a complete solution. It swaps bits from the back of a bit array to the front, vs operating on ints/bytes and then needing to swap ints/bytes in an array.
Also, this is a general purpose bit library, so you might find it handy in the future for solving other, more mundane problems.
Is it as fast as the accepted answer? I think it's close, but without working code to benchmark it's impossible to say. Feel free to cut and paste this working program.
// Reverse BitsInBuff.cpp : Defines the entry point for the console application.
#include "stdafx.h"
#include "time.h"
#include "memory.h"
//
// Manifest constants
#define uchar unsigned char
#define BUFF_BYTES 510 //400 supports a display of 80x40 bits
#define DW 80 // Display Width
// ----------------------------------------------------------------------------
uchar mask_set[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
uchar mask_clr[] = { 0xfe, 0xfd, 0xfb, 0xf7, 0xef, 0xdf, 0xbf, 0x7f };
//
// Function Prototypes
static void PrintIntBits(long x, int bits);
void BitSet(uchar * BitArray, unsigned long BitNumber);
void BitClr(uchar * BitArray, unsigned long BitNumber);
void BitTog(uchar * BitArray, unsigned long BitNumber);
uchar BitGet(uchar * BitArray, unsigned long BitNumber);
void BitPut(uchar * BitArray, unsigned long BitNumber, uchar value);
//
uchar *ReverseBitsInArray(uchar *Buff, int BitKnt);
static void PrintIntBits(long x, int bits);
// -----------------------------------------------------------------------------
// Reverse the bit ordering in an array
uchar *ReverseBitsInArray(uchar *Buff, int BitKnt) {
unsigned long front=0, back = BitKnt-1;
uchar temp;
while( front<back ) {
temp = BitGet(Buff, front); // copy front bit to temp before overwriting
BitPut(Buff, front, BitGet(Buff, back)); // copy back bit to front bit
BitPut(Buff, back, temp); // copy saved value of front in temp to back of bit arra)
front++;
back--;
}
return Buff;
}
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
int _tmain(int argc, _TCHAR* argv[]) {
int i, j, k, LoopKnt = 1000001;
time_t start;
uchar Buff[BUFF_BYTES];
memset(Buff, 0, sizeof(Buff));
// make an ASCII art picture
for(i=0, k=0; i<(sizeof(Buff)*8)/DW; i++) {
for(j=0; j<DW/2; j++) {
BitSet(Buff, (i*DW)+j+k);
}
k++;
}
// print ASCII art picture
for(i=0; i<sizeof(Buff); i++) {
if(!(i % 10)) printf("\n"); // print bits in blocks of 80
PrintIntBits(Buff[i], 8);
}
i=LoopKnt;
start = clock();
while( i-- ) {
ReverseBitsInArray((uchar *)Buff, BUFF_BYTES * 8);
}
// print ASCII art pic flipped upside-down and rotated left
printf("\nMilliseconds elapsed = %d", clock() - start);
for(i=0; i<sizeof(Buff); i++) {
if(!(i % 10)) printf("\n"); // print bits in blocks of 80
PrintIntBits(Buff[i], 8);
}
printf("\n\nBenchmark time for %d loops\n", LoopKnt);
getchar();
return 0;
}
// -----------------------------------------------------------------------------
// Scaffolding...
static void PrintIntBits(long x, int bits) {
unsigned long long z=1;
int i=0;
z = z << (bits-1);
for (; z > 0; z >>= 1) {
printf("%s", ((x & z) == z) ? "#" : ".");
}
}
// These routines do bit manipulations on a bit array of unsigned chars
// ---------------------------------------------------------------------------
void BitSet(uchar *buff, unsigned long BitNumber) {
buff[BitNumber >> 3] |= mask_set[BitNumber & 7];
}
// ----------------------------------------------------------------------------
void BitClr(uchar *buff, unsigned long BitNumber) {
buff[BitNumber >> 3] &= mask_clr[BitNumber & 7];
}
// ----------------------------------------------------------------------------
void BitTog(uchar *buff, unsigned long BitNumber) {
buff[BitNumber >> 3] ^= mask_set[BitNumber & 7];
}
// ----------------------------------------------------------------------------
uchar BitGet(uchar *buff, unsigned long BitNumber) {
return (uchar) ((buff[BitNumber >> 3] >> (BitNumber & 7)) & 1);
}
// ----------------------------------------------------------------------------
void BitPut(uchar *buff, unsigned long BitNumber, uchar value) {
if(value) { // if the bit at buff[BitNumber] is true.
BitSet(buff, BitNumber);
} else {
BitClr(buff, BitNumber);
}
}
Below is the code listing for an optimization using a new buffer, instead of swapping bytes in place. Given that only 2030:4080 BitSet()s are needed because of the if() test, and about half the GetBit()s and PutBits() are eliminated by eliminating TEMP, I suspect memory access time is a large, fixed cost to these kinds of operations, providing a hard limit to optimization.
Using a look-up approach, and CONDITIONALLY swapping bytes, rather than bits, reduces by a factor of 8 the number of memory accesses, and testing for a 0 byte gets amortized across 8 bits, rather than 1.
Using these two approaches together, testing to see if the entire 8-bit char is 0 before doing ANYTHING, including the table lookup, and the write, is likely going to be the fastest possible approach, but would require an extra 512 bytes for the new, destination bit array, and 256 bytes for the lookup table. The performance payoff might be quite dramatic though.
// -----------------------------------------------------------------------------
// Reverse the bit ordering in new array
uchar *ReverseBitsInNewArray(uchar *Dst, const uchar *Src, const int BitKnt) {
int front=0, back = BitKnt-1;
memset(Dst, 0, BitKnt/BitsInByte);
while( front < back ) {
if(BitGet(Src, back--)) { // memset() has already set all bits in Dst to 0,
BitSet(Dst, front); // so only reset if Src bit is 1
}
front++;
}
return Dst;