Follow-up question for IEEE 754 conformant sqrt() implementation for double type.
Context: Need to implement IEEE 754 conformant sqrtf()
taking into account the following HW restrictions and usage limitations:
Provides a special instruction
qseed.f
to get an approximation of the reciprocal of the square root (the accuracy of the result is no less than 6.75 bits, and therefore always within ±1% of the accurate result).Single precision FP:
a. Support by HW (SP FPU): has support;
b. Support by SW (library): has support;
c. Support of subnormal numbers: no support (
FLT_HAS_SUBNORM
is 0).Double precision FP:
a. Support by HW (DP FPU): no support;
b. Support by SW (library): has support;
c. Support of subnormal numbers: no support (
DBL_HAS_SUBNORM
is 0).
I've found one presentation by John Harrison and ended up with this implementation (note that here qseed.f
is replaced by rsqrtf()
):
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
// https://github.com/nickzman/hyperspace/blob/master/frsqrt.hh
#if 1
float rsqrtf ( float x )
{
const float xhalf = 0.5f * x;
int i = *(int*) & x;
i = 0x5f375a86 - ( i >> 1 );
x = *(float*) & i;
x = x * ( 1.5f - xhalf * x * x );
x = x * ( 1.5f - xhalf * x * x );
x = x * ( 1.5f - xhalf * x * x );
return x;
}
#else
float rsqrtf ( float x )
{
return 1.0f / sqrtf( x );
}
#endif
float sqrtfr_jh( float x, float r )
{
/*
* John Harrison, Formal Verification Methods 5: Floating Point Verification,
* Intel Corporation, 12 December 2002, document name: slides5.pdf, page 14,
* slide "The square root algorithm".
* URL: https://www.cl.cam.ac.uk/~jrh13/slides/anu-09_12dec02/slides5.pdf
*/
double rd, b, z0, s0, d, k, h0, e, t0, s1, c, d1, h1, s;
static const double half = 0.5;
static const double one = 1.0;
static const double three = 3.0;
static const double two = 2.0;
rd = (double)r;
b = half * x;
z0 = rd * rd;
s0 = x * rd;
d = fma( -b, z0, half );
k = fma( x, rd, -s0 );
h0 = half * rd;
e = fma( three / two, d, one );
t0 = fma( d, s0, k );
s1 = fma( e, t0, s0 );
c = fma( d, e, one );
d1 = fma( -s1, s1, x );
h1 = c * h0;
s = fma( d1, h1, s1 );
return (float)s;
}
float my_sqrtf( float x )
{
/* handle special cases */
if (x == 0) {
return x + x;
}
/* handle normal cases */
if ((x > 0) && (x < INFINITY)) {
return sqrtfr_jh( x, rsqrtf( x ) );
}
/* handle special cases */
return (x < 0) ? NAN : (x + x);
}
/*
https://groups.google.com/forum/#!original/comp.lang.c/qFv18ql_WlU/IK8KGZZFJx4J
From: geo <gmars...@gmail.com>
Newsgroups: sci.math,comp.lang.c,comp.lang.fortran
Subject: 64-bit KISS RNGs
Date: Sat, 28 Feb 2009 04:30:48 -0800 (PST)
This 64-bit KISS RNG has three components, each nearly
good enough to serve alone. The components are:
Multiply-With-Carry (MWC), period (2^121+2^63-1)
Xorshift (XSH), period 2^64-1
Congruential (CNG), period 2^64
*/
static uint64_t kiss64_x = 1234567890987654321ULL;
static uint64_t kiss64_c = 123456123456123456ULL;
static uint64_t kiss64_y = 362436362436362436ULL;
static uint64_t kiss64_z = 1066149217761810ULL;
static uint64_t kiss64_t;
#define MWC64 (kiss64_t = (kiss64_x << 58) + kiss64_c, \
kiss64_c = (kiss64_x >> 6), kiss64_x += kiss64_t, \
kiss64_c += (kiss64_x < kiss64_t), kiss64_x)
#define XSH64 (kiss64_y ^= (kiss64_y << 13), kiss64_y ^= (kiss64_y >> 17), \
kiss64_y ^= (kiss64_y << 43))
#define CNG64 (kiss64_z = 6906969069ULL * kiss64_z + 1234567ULL)
#define KISS64 (MWC64 + XSH64 + CNG64)
int main (void)
{
const uint64_t N = 10000000000ULL; /* desired number of test cases */
float arg, ref, res;
uint64_t argi64;
uint32_t refi, resi;
uint64_t count = 0;
float spec[] = {0.0f, 1.0f, INFINITY, NAN};
printf ("test a few special cases:\n");
for (int i = 0; i < sizeof (spec)/sizeof(spec[0]); i++) {
printf ("my_sqrt(%a) = %a\n", spec[i], my_sqrtf(spec[i]));
printf ("my_sqrt(%a) = %a\n", -spec[i], my_sqrtf(-spec[i]));
}
printf ("test %lu random cases:\n", N);
do {
argi64 = KISS64;
memcpy (&arg, &argi64, sizeof arg);
if ( fpclassify(arg) == FP_SUBNORMAL )
{
continue;
}
++count;
res = my_sqrtf (arg);
ref = sqrtf (arg);
memcpy (&resi, &res, sizeof resi);
memcpy (&refi, &ref, sizeof refi);
if ( ! ( isnan(res) && isnan(ref) ) )
if (resi != refi) {
printf ("\rerror @ arg=%a (%e)\n", arg, arg);
printf ("\rerror @ res=%a (%e)\n", res, res);
printf ("\rerror @ ref=%a (%e)\n", ref, ref);
return EXIT_FAILURE;
}
if ((count & 0xfffff) == 0) printf ("\r[%lu]", count);
} while (count < N);
printf ("\r[%lu]", count);
printf ("\ntests PASSED\n");
return EXIT_SUCCESS;
}
And it seems to work correctly (at least for some random cases): it reports:
[10000000000]
tests PASSED
Now the question: since the original John Harrison's sqrtf()
algorithm uses only single precision computations (i.e. type float
), it is possible to reduce the number of operations when using only (except conversions) double precision computations (i.e. type double
) and still be IEEE 754 conformant?
P.S. Since users @njuffa
and @chux - Reinstate Monica
are strong in FP, I invite them to participate. However, all the competent in FP users are welcome.