I need to accurately calculate where a and b
are both integers. If I simply use typical change of base formula with floating point math functions I wind up with errors due to rounding error.
-
Can you please provide expected and calculated results. – Yuri Ginsburg Jun 14 '22 at 06:40
-
I updated my answer with more optimized code ... – Spektre Jun 14 '22 at 09:07
1 Answers
You can use this identity:
b^logb(a) = a
So binary search x = logb(a)
so the result of b^x
is biggest integer which is still less than a
and afterwards just increment the final result.
Here small C++ example for 32 bits:
//---------------------------------------------------------------------------
DWORD u32_pow(DWORD a,DWORD b) // = a^b
{
int i,bits=32;
DWORD d=1;
for (i=0;i<bits;i++)
{
d*=d;
if (DWORD(b&0x80000000)) d*=a;
b<<=1;
}
return d;
}
//---------------------------------------------------------------------------
DWORD u32_log2(DWORD a) // = ceil(log2(a))
{
DWORD x;
for (x=32;((a&0x80000000)==0)&&(x>1);x--,a<<=1);
return x;
}
//---------------------------------------------------------------------------
DWORD u32_log(DWORD b,DWORD a) // = ceil(logb(a))
{
DWORD x,m,bx;
// edge cases
if (b< 2) return 0;
if (a< 2) return 0;
if (a<=b) return 1;
m=1<<(u32_log2(a)-1); // max limit for b=2, all other bases lead to smaller exponents anyway
for (x=0;m;m>>=1)
{
x|=m;
bx=u32_pow(b,x);
if (bx>=a) x^=m;
}
return x+1;
}
//---------------------------------------------------------------------------
Where DWORD
is any unsigned 32bit int type... for more info about pow,log,exp and bin search see:
Note that u32_log2
is not really needed (unless you want bigints) you can use constant bitwidth instead, also some CPUs like x86 has single asm instruction returning the same much faster than for loop...
Now the next step is exploit the fact that the u32_pow
bin search is the same as the u32_log
bin search so we can merge the two functions and get rid of one nested for
loop completely improving complexity considerably like this:
//---------------------------------------------------------------------------
DWORD u32_pow(DWORD a,DWORD b) // = a^b
{
int i,bits=32;
DWORD d=1;
for (i=0;i<bits;i++)
{
d*=d;
if (DWORD(b&0x80000000)) d*=a;
b<<=1;
}
return d;
}
//---------------------------------------------------------------------------
DWORD u32_log2(DWORD a) // = ceil(log2(a))
{
DWORD x;
for (x=32;((a&0x80000000)==0)&&(x>1);x--,a<<=1);
return x;
}
//---------------------------------------------------------------------------
DWORD u32_log(DWORD b,DWORD a) // = ceil(logb(a))
{
const int _bits=32; // DWORD bitwidth
DWORD bb[_bits]; // squares of b LUT for speed up b^x
DWORD x,m,bx,bx0,bit,bits;
// edge cases
if (b< 2) return 0;
if (a< 2) return 0;
if (a<=b) return 1;
// max limit for x where b=2, all other bases lead to smaller x
bits=u32_log2(a);
// compute bb LUT
bb[0]=b;
for (bit=1;bit< bits;bit++) bb[bit]=bb[bit-1]*bb[bit-1];
for ( ;bit<_bits;bit++) bb[bit]=1;
// bin search x and b^x at the same time
for (bx=1,x=0,bit=bits-1,m=1<<bit;m;m>>=1,bit--)
{
x|=m; bx0=bx; bx*=bb[bit];
if (bx>=a){ x^=m; bx=bx0; }
}
return x+1;
}
//---------------------------------------------------------------------------
The only drawback is that we need LUT for squares of b
so: b,b^2,b^4,b^8...
up to bits
number of squares
Beware squaring will double the number of bits so you should also handle overflow if b
or a
are too big ...
[Edit2] more optimization
As benchmark on normal ints (on bigints the bin search is much much faster) revealed bin search version is the same speed as naive version (because of many subsequent operations except multiplications):
DWORD u32_log_naive(DWORD b,DWORD a) // = ceil(logb(a))
{
int x,bx;
if (b< 2) return 0;
if (a< 2) return 0;
if (a<=b) return 1;
for (x=2,bx=b;bx*=b;x++)
if (bx>=a) break;
return x;
}
We can optimize more:
we can comment out computation of unused squares:
//for ( ;bit<_bits;bit++) bb[bit]=1;
with this bin search become faster also on
int
s but not by muchwe can use faster
log2
instead of naive one
putting all together (x86 CPUs):
DWORD u32_log(DWORD b,DWORD a) // = ceil(logb(a))
{
const int _bits=32; // DWORD bitwidth
DWORD bb[_bits]; // squares of b LUT for speed up b^x
DWORD x,m,bx,bx0,bit,bits;
// edge cases
if (b< 2) return 0;
if (a< 2) return 0;
if (a<=b) return 1;
// max limit for x where b=2, all other bases lead to smaller x
asm {
bsr eax,a; // bits=u32_log2(a);
mov bits,eax;
}
// compute bb LUT
bb[0]=b;
for (bit=1;bit< bits;bit++) bb[bit]=bb[bit-1]*bb[bit-1];
// for ( ;bit<_bits;bit++) bb[bit]=1;
// bin search x and b^x at the same time
for (bx=1,x=0,bit=bits-1,m=1<<bit;m;m>>=1,bit--)
{
x|=m; bx0=bx; bx*=bb[bit];
if (bx>=a){ x^=m; bx=bx0; }
}
return x+1;
}
however the speed up is just slight for example naive 137 ms
bin search 133 ms
... note that faster log2
did almost no change but that is because how my compiler is handling inline asm
(not sure why BDS2006 and BCC32 is very slow on switching between asm and C++ but its true that is why in older C++ builders inline asm functions where not a good choice for speed optimizations unless a major speedup was expected) ...

- 49,595
- 11
- 110
- 380
-
OK, so "use a binary search" certainly sounds like something that would result in more efficiency. But when I compare this to a naïve solution of `for (let i = 2, n = b; n *= b; i++) { if (n >= a) return i; }` and it seems to me like you solution is doing way more operations, especially multiplications. I haven't tried implementing and benchmarking this in a language like C/C++, but at least in JavaScript this solution is seemingly much slower than the naïve approach. In what circumstances is this more efficient? Have you ever benchmarked this? – Chris_F Jun 16 '22 at 02:12
-
@Chris_F Your naive solution will need `N` multiplications mine `log2(N)+log2log2(N)` as I am multiplying by squares of `b` and you just by `b` ... yes for small `a` or big `b` your solution will have less multiplications but the bigger `a` and smaller `b` the faster bin search will become (way more faster) for example if you do `n=b*b*b*b*b*b*b*b*b*b*b*b*b*b*b` I do `b2=b*b; b4=b2*b2; b8=b4*b4; n=b8*b4*b2*b` instead and that is just `b^15` and I am already faster with ratio `14/6` multiplications now imagine `b^1000`. – Spektre Jun 16 '22 at 06:57
-
@Chris_F But you right the other operations on normal `int`s have the same complexity as multiplication so the result performance is the same (Yes I benchmarked and times mach almost perfectly in C++) However once you add `bigints` into account the things got even more better in favor for bin search as the subsequent operations have much lover complexity than multiplication while for native `int` they are the same ... – Spektre Jun 16 '22 at 06:59
-
@Chris_F I added [edit2] with some minor optimizations leading to slight better speed but just slight ... – Spektre Jun 16 '22 at 07:25
-
-
@phuclv my compiler does not recognize it (its old) also it is not equivalent instruction ...If I see it right it would require increment which will slow things down more ... and the edge case additional condition but that is not used in this case – Spektre Jun 16 '22 at 08:12
-
if you want better optimization then you should use modern tools and modern ISA extensions, otherwise that micro-optimization is pointless – phuclv Jun 16 '22 at 08:15