I came up with three solutions so far:
The extremely inefficient standard library pow
and log2
functions:
int_fast16_t powlog(uint_fast16_t n)
{
return static_cast<uint_fast16_t>(pow(2, floor(log2(n))));
}
Far more efficient counting subsequent powers of 2 until I reach a greater number than I had to reach:
uint_fast16_t multiply(uint_fast16_t n)
{
uint_fast16_t maxpow = 1;
while(2*maxpow <= n)
maxpow *= 2;
return maxpow;
}
The most efficient so far binsearching a precomputed table of powers of 2:
uint_fast16_t binsearch(uint_fast16_t n)
{
static array<uint_fast16_t, 20> pows {1,2,4,8,16,32,64,128,256,512,
1024,2048,4096,8192,16384,32768,65536,131072,262144,524288};
return *(upper_bound(pows.begin(), pows.end(), n)-1);
}
Can this be optimized even more? Any tricks that could be used here?
Full benchmark I used:
#include <iostream>
#include <chrono>
#include <cmath>
#include <cstdint>
#include <array>
#include <algorithm>
using namespace std;
using namespace chrono;
uint_fast16_t powlog(uint_fast16_t n)
{
return static_cast<uint_fast16_t>(pow(2, floor(log2(n))));
}
uint_fast16_t multiply(uint_fast16_t n)
{
uint_fast16_t maxpow = 1;
while(2*maxpow <= n)
maxpow *= 2;
return maxpow;
}
uint_fast16_t binsearch(uint_fast16_t n)
{
static array<uint_fast16_t, 20> pows {1,2,4,8,16,32,64,128,256,512,
1024,2048,4096,8192,16384,32768,65536,131072,262144,524288};
return *(upper_bound(pows.begin(), pows.end(), n)-1);
}
high_resolution_clock::duration test(uint_fast16_t(powfunct)(uint_fast16_t))
{
auto tbegin = high_resolution_clock::now();
volatile uint_fast16_t sink;
for(uint_fast8_t i = 0; i < UINT8_MAX; ++i)
for(uint_fast16_t n = 1; n <= 999999; ++n)
sink = powfunct(n);
auto tend = high_resolution_clock::now();
return tend - tbegin;
}
int main()
{
cout << "Pow and log took " << duration_cast<milliseconds>(test(powlog)).count() << " milliseconds." << endl;
cout << "Multiplying by 2 took " << duration_cast<milliseconds>(test(multiply)).count() << " milliseconds." << endl;
cout << "Binsearching precomputed table of powers took " << duration_cast<milliseconds>(test(binsearch)).count() << " milliseconds." << endl;
}
Compiled with -O2
this gave the following results on my laptop:
Pow and log took 19294 milliseconds.
Multiplying by 2 took 2756 milliseconds.
Binsearching precomputed table of powers took 2278 milliseconds.