The code below does not compute a proven correctly rounded double
precision cube root (see Robin's answer/work for that case). However,
after extensive testing I wasn't able to find any incorrectly rounded results.
Therefore the results of function cbrt_ac()
might be accidentally correctly rounded.
Halley's method
Indeed Halley's method is very suitable to improve the accuracy of an approximate cube root, see also njuffa's answer.
Here we rewrite the Halley update formula slightly:
x := x - x * (x^3 - a) / (2*x^3 + a) = x + x * (a - x^3) / (2*x^3 + a)
If we define d = a - x^3
, then x^3 = a - d
and
x := x + x * d / (2*(a - d) + a)
:= x + x * ( d / (3*a - 2*d) )
This form of the denominator (3*a - 2*d)
is suitable for both efficient and accurate evaluation.
In order to compute d = a - x^3
accurately we first compute x^2
as an exact sum of 2 doubles (as suggested by Stephen Canon), using the fma instruction: x^2 = x2_h + x2_l
exactly,
see e.g. Karp and Markstein, High precision division and square root.
x2_h := x * x
x2_l := fma(x, x, -x_h)
Now we can compute d = a - x^3
with a relatively high accuracy:
d_h := fma(x, -x2_h, a)
d := fma(x, -x2_l, d_h)
Function cbrt_ac()
In the code below an initial approximation to y = a^(-2/3)
is computed in a "fast inverse square root" way. Three division
free (inexpensive) Newton iterations improve the accuracy. r = a^(1/3)
is approximated with r = a * y
and
the accuracy of this cube root is improved with a pseudo Newton step. This leads to an almost correctly
rounded division free cube root method with about 0.50002
ulp accuracy.
Finally the accuracy is further improved with a single Halley iteration.
After more than 10^10
test calls to cbrt_ac()
the maximum observed error was 4.9999999999404426e-01
ulp.
Here the double precision results of cbrt_ac()
were compared with gcc's quad precision ( __float128
) function cbrtq
.
This suggests that the double precision function cbrt_ac()
might be correctly rounded.
Please let me know if you find an incorrectly rounded result.
Double precision code
/* Accurate double precision cube root function cbrt_ac() and surrounding test code */
#include <stdio.h>
#include <stdint.h>
#include <math.h>
#include <quadmath.h>
/* gcc -O3 -Wall -m64 -std=c99 -march=skylake cbrt_tst.c -lm -lquadmath */
/* Assumptions:
- rounding mode: round nearest
- safe math: with gcc don't use -ffast-math, with icc enable -fp-model=precise
- ieee-754 floating point math, higher intermediate floating point precision should not be used in the calculations, i.e. FLT_EVAL_METHOD == 0, which is default with the gcc x86-64 compiler.
*/
uint64_t d2i (double x);
double i2d (uint64_t i);
double cbrt_ac(double z){ // Accurate cube root (double)
double a, y, r, r2_h, r2_l, y_a2y4, ayy, diff, diff3, denom;
uint64_t ai, ai23, aim23;
int issmall;
a = fabs(z);
issmall = (a < 0.015625); // Scale large, small and/or subnormal numbers to avoid underflow, overflow or subnormal numbers
a = issmall ? (a * 0x1.0p+210) : (a * 0.125);
ai = d2i(a);
if ((ai >= 0x7FF0000000000000ull) || (z == 0.0)){ // Inf, 0.0 and NaN
r = z + z;
}
else
{
ai23 = 2 * (ai/3); // Integer division. The compiler, with suitable optimization level, should generate a much more efficient multiplication by 0xAAAAAAAAAAAAAAAB
aim23 = 0x6A8EB53800000000ull - ai23; // This uses a similar idea as the "fast inverse square root" approximation, see https://en.wikipedia.org/wiki/Fast_inverse_square_root
y = i2d(aim23); // y is an approximation of a^(-2/3)
ayy = (a * y) * y; // First Newton iteration for f(y)=a^2-y^-3 to calculate a better approximation y=a^(-2/3)
y_a2y4 = fma(ayy, -ayy, y);
y = fma(y_a2y4, 0.33333333333333333333, y);
ayy = (a * y) * y; // Second Newton iteration
y_a2y4 = fma(ayy, -ayy, y);
y = fma(y_a2y4, 0.33523333333, y); // This is a small modification to the exact Newton parameter 1/3 which gives slightly better results
ayy = (a * y) * y; // Third Newton iteration
y_a2y4 = fma(ayy, -ayy, y);
y = fma(y_a2y4, 0.33333333333333333333, y);
r = y * a; // Now r = y * a is an approximation of a^(1/3), because y approximates a^(-2/3).
r2_h = r * r; // Compute one pseudo Newton step with g(r)=a-r^3, but instead of dividing by f'(r)=3r^2 we multiply with
// the approximation 0.3333...*y (division is usually a relatively expensive operation)
r2_l = fma(r, r, -r2_h); // For better accuracy we split r*r=r^2 as r^2=r2_h+r2_l exactly.
diff = fma(r2_h, -r, a); // Compute diff=a-r^3 accurately: diff=(a-r*r2_h)-r*r2_l with two fma instructions
diff = fma(r2_l, -r, diff);
diff3 = diff * 0.33333333333333333333;
r = fma(diff3, y, r); // Now r approximates a^(1/3) within about 0.50002 ulp
r2_h = r * r; // One final Halley iteration
r2_l = fma(r, r, -r2_h);
diff = fma(r2_h, -r, a);
diff = fma(r2_l, -r, diff);
denom = fma(a, 3.0, -2.0 * diff);
r = fma(diff/denom, r, r);
r = issmall ? (r * 0x1.0p-70) : (r * 2.0); // Undo scaling
r = copysign(r, z);
}
return r;
}
int main(){
uint64_t i, ai;
ai=0;
double x, r;
double ulp, err, maxerr;
__float128 rq;
maxerr=0.0;
ai=0x7FEFFFFFFFFFFFFFull;
x = i2d (ai); // To test negative numbers start with x = -i2d (ai);
for(i=0; (i<12000000000ull)&&(x!=0.0); i=i+1){
r = cbrt_ac(x);
rq = cbrtq((__float128)x); // Compare with quad precision result rounded to double precision
if ( (r-(double)rq) != 0.0f ){
printf("not equal %12lu %24.16e %24.16e %24.16e \n", i, x, r, (r-(double)rq)/(double)rq);
}
ulp = (nextafter(r, 1.0/0.0)-r); // Compare double precision with quad precision and estimate ulp error
err = fabs( (double)((__float128)r - rq) )/ulp;
if (err>maxerr){
maxerr=err;
printf("ulp err %12lu %24.16e %24.16e %24.16e \n", i, x, r, err);
}
x = x * 0.9998596; // Multiply with approx 10^(-610/n_test) 10^(-610/1e6)=0.9985964
x = nextafter(x, 0); // 1e7 -> 0.99985955 1e8 -> 0.999985954 1e9 -> 0.999998595
} // After more than 10^10 test calls to cbrt_ac() the
printf("i = %12lu x = %24.16e \n", i, x); // maximum observed ulp error is 4.9999999999404426e-01
return 0;
}
union dbl_uint64{
double d;
uint64_t i;
};
uint64_t d2i (double x){ // Bitwise copy (type-punning) from double to uint64_t (no conversion)
union dbl_uint64 tmp ; // With C++ use memcopy instead of this "union trick"
tmp.d = x;
return tmp.i;
}
double i2d (uint64_t i){ // Bitwise copy (type-punning) from uint64_t to double (no conversion)
union dbl_uint64 tmp ; // With C++ use memcopy instead of this "union trick"
tmp.i = i;
return tmp.d;
}
Single precision code
Numerical tests with all possible input values of cbrtf_ac(), see code below, indicate that the error of this single precision cube
root function is less than 0.5 ulp, hence the result is a correctly rounded cube root.
/* Accurate float cube root function cbrtf_ac() and surrounding test code */
#include <stdio.h>
#include <stdint.h>
#include <math.h>
/* gcc -O3 -Wall -m64 -std=c99 -march=skylake cbrtf_tst.c -lm */
/* Assumptions:
- rounding mode: round nearest
- safe math: with gcc don't use -ffast-math, with icc enable -fp-model=precise
- ieee-754 floating point math, higher intermediate floating point precision should not be used in the calculations, i.e. FLT_EVAL_METHOD == 0, which is default with the gcc x86-64 compiler.
*/
uint32_t f2i (float x);
float i2f (uint32_t i);
uint64_t d2i (double x);
double i2d (uint64_t i);
double cbrt_ac(double z);
float cbrtf_ac(float z){ // Accurate cubic root (float)
float a, y, r, r2_h, r2_l, y_a2y4, ayy, diff, diff3, denom;
uint32_t ai, ai23, aim23;
int issmall;
a = fabsf(z);
issmall = (a < 0.015625f); // Scale large, small and/or subnormal numbers to avoid underflow, overflow or subnormal numbers
a = issmall ? (a * 0x1.0p+81f) : (a * 0.125f);
ai = f2i(a);
if ((ai > 0x7f7fffffu) || (z == 0.0f)){ // Inf, 0.0 and NaN
r = z + z;
}
else
{
ai23 = 2 * (ai/3); // Integer division. The compiler, with suitable optimization level, should generate a much more efficient multiplication by 0xAAAAAAAB
aim23 = 1774904040u - ai23; // This uses a similar idea as the "fast inverse square root" approximation, see https://en.wikipedia.org/wiki/Fast_inverse_square_root
y = i2f(aim23); // y is an approximation of a^(-2/3)
ayy = (a * y) * y; // Newton iteration for f(y)=a^2-y^-3 to calculate a better approximation y=a^(-2/3)
y_a2y4 = fmaf(ayy, -ayy, y);
y = fmaf(y_a2y4, 0.33333333333333333f, y);
ayy = (a * y) * y; // Second Newton iteration
y_a2y4 = fmaf(ayy, -ayy, y);
y = fmaf(y_a2y4, 0.3351667f, y); // This is a small modification to the exact Newton parameter 1/3 which gives slightly better results
r = y * a; // Now r = y * a is an approximation of a^(1/3), because y approximates a^(-2/3).
r2_h = r * r; // Compute one pseudo Newton step on g(r)=a-r^3, but instead of dividing by f'(r)=3r^2 we multiply with
// the approximation 0.3333...*y (division is usually a relatively expensive operation)
r2_l = fmaf(r, r, -r2_h); // For better accuracy we split r*r=r^2 as r^2=r2_h+r2_l exactly.
diff = fmaf(r2_h, -r, a); // Compute diff=a-r^3 accurately: diff=(a-r*r2_h)-r*r2_l with two fmaf instructions
diff = fmaf(r2_l, -r, diff); //
diff3 = diff * 0.333333333333333333f;
r = fmaf(diff3, y, r); // Now r is a near 0.5 ulp approximation to cbrtf(a)
r2_h = r * r; // One final Halley iteration
r2_l = fmaf(r, r, -r2_h);
diff = fmaf(r2_h, -r, a);
diff = fmaf(r2_l, -r, diff);
denom = fmaf(a, 3.0f, -2.0f * diff);
r = fmaf(diff/denom, r, r);
r = issmall ? (r * 0x1.0p-27f) : (r * 2.0f); // Undo scaling
r = copysignf(r, z);
}
return r;
}
int main(){
uint32_t i, ai;
ai=0;
float x, r;
double ulp, err, maxerr, rd;
maxerr=0.0;
for(i=0; i<0x7f800002u; i=i+1){
ai=i;
x = i2f (ai); // To test negative numbers: x = -i2f (ai);
r = cbrtf_ac(x);
rd = cbrt_ac( (double)x); // Compare with double precision result rounded to single precision
// rd = nextafter(rd, 1e50); // Uncomment one of these lines for 2 alternative tests
// rd = nextafter(rd, -1e50); // to compensate for a cbrt which might be 1 ulp inaccurate
if ( (r-(float)rd) != 0.0f ){
printf("not equal %u %18.10e %18.10e %18.10e %18.10e \n", i, x, r, rd, r-(float)rd);
}
ulp = (double)(nextafterf(r, 1.0f/0.0f)-r); // Compare single precision with double precision and estimate ulp error
err = fabs((double)r - rd)/ulp;
if (err>maxerr){
maxerr=err;
printf("ulp err %11u %18.10e %18.10e %18.10e \n", i, x, r, err);
}
}
return 0;
}
union flt_uint32{
float f;
uint32_t i;
};
uint32_t f2i (float x){ // Bitwise copy (type-punning) from float to int (no conversion)
union flt_uint32 tmp ; // With C++ use memcopy instead of this "union trick"
tmp.f = x;
return tmp.i;
}
float i2f (uint32_t i){ // Bitwise copy (type-punning) from int to float (no conversion)
union flt_uint32 tmp ; // With C++ use memcopy instead of this "union trick"
tmp.i = i;
return tmp.f;
}
double cbrt_ac(double z){ // Accurate cube root (double)
double a, y, r, r2_h, r2_l, y_a2y4, ayy, diff, diff3, denom;
uint64_t ai, ai23, aim23;
int issmall;
a = fabs(z);
issmall = (a < 0.015625); // Scale large, small and/or subnormal numbers to avoid underflow, overflow or subnormal numbers
a = issmall ? (a * 0x1.0p+210) : (a * 0.125);
ai = d2i(a);
if ((ai >= 0x7FF0000000000000ull) || (z == 0.0)){ // Inf, 0.0 and NaN
r = z + z;
}
else
{
ai23 = 2 * (ai/3); // Integer division. The compiler, with suitable optimization level, should generate a much more efficient multiplication by 0xAAAAAAAAAAAAAAAB
aim23 = 0x6A8EB53800000000ull - ai23; // This uses a similar idea as the "fast inverse square root" approximation, see https://en.wikipedia.org/wiki/Fast_inverse_square_root
y = i2d(aim23); // y is an approximation of a^(-2/3)
ayy = (a * y) * y; // First Newton iteration for f(y)=a^2-y^-3 to calculate a better approximation y=a^(-2/3)
y_a2y4 = fma(ayy, -ayy, y);
y = fma(y_a2y4, 0.33333333333333333333, y);
ayy = (a * y) * y; // Second Newton iteration
y_a2y4 = fma(ayy, -ayy, y);
y = fma(y_a2y4, 0.33523333333, y); // This is a small modification to the exact Newton parameter 1/3 which gives slightly better results
ayy = (a * y) * y; // Third Newton iteration
y_a2y4 = fma(ayy, -ayy, y);
y = fma(y_a2y4, 0.33333333333333333333, y);
r = y * a; // Now r = y * a is an approximation of a^(1/3), because y approximates a^(-2/3).
r2_h = r * r; // Compute one pseudo Newton step with g(r)=a-r^3, but instead of dividing by f'(r)=3r^2 we multiply with
// the approximation 0.3333...*y (division is usually a relatively expensive operation)
r2_l = fma(r, r, -r2_h); // For better accuracy we split r*r=r^2 as r^2=r2_h+r2_l exactly.
diff = fma(r2_h, -r, a); // Compute diff=a-r^3 accurately: diff=(a-r*r2_h)-r*r2_l with two fma instructions
diff = fma(r2_l, -r, diff);
diff3 = diff * 0.33333333333333333333;
r = fma(diff3, y, r); // Now r approximates a^(1/3) within about 0.50002 ulp
r2_h = r * r; // One final Halley iteration
r2_l = fma(r, r, -r2_h);
diff = fma(r2_h, -r, a);
diff = fma(r2_l, -r, diff);
denom = fma(a, 3.0, -2.0 * diff);
r = fma(diff/denom, r, r);
r = issmall ? (r * 0x1.0p-70) : (r * 2.0); // Undo scaling
r = copysign(r, z);
}
return r;
}
union dbl_uint64{
double d;
uint64_t i;
};
uint64_t d2i (double x){ // Bitwise copy (type-punning) from double to uint64_t (no conversion)
union dbl_uint64 tmp ; // With C++ use memcopy instead of this "union trick"
tmp.d = x;
return tmp.i;
}
double i2d (uint64_t i){ // Bitwise copy (type-punning) from uint64_t to double (no conversion)
union dbl_uint64 tmp ; // With C++ use memcopy instead of this "union trick"
tmp.i = i;
return tmp.d;
}