I wrote this implementation of sqrt that is finite in complexity and precise up to the last digit when double is ieee754 double. The question is that is this portable on devices of various endian (assuming 0LL is still 64 bit)? get_fraction returns the 52bits plus the 1 bit at the begining. Small doubles are treated separately and ensured that they also have 1 in the 53rd bit. The c++ part numeric_limits
nan can easily be replaced with a constant.
Code:
static inline constexpr int16_t get_exponent(double x)
{
uint64_t bits = *(uint64_t*)&x;
int16_t val = ((bits & 0x7FF0000000000000ULL) >> 52) - 1023;
if(val != -1023)
return val;
uint64_t temp_fractal= (bits & 0x000FFFFFFFFFFFFFULL);
for (int i = 51; i >= 0;--i) {
if(!(temp_fractal & (0x01ULL<<i))) --val;
else break;
}
return val;
}
static inline constexpr uint64_t get_fraction(double x)
{
uint64_t bits = *(uint64_t*)&x;
if (bits & 0x7FF0000000000000ULL)
return (bits & 0x000FFFFFFFFFFFFFULL) | 0x0010000000000000ULL;
uint64_t temp_fraction = bits & 0x000FFFFFFFFFFFFFULL;
for (int i = 51; i >= 0; --i) {
temp_fraction<<=1;
if(0x0010000000000000ULL & temp_fraction) break;
}
return temp_fraction;
}
static inline constexpr bool is_reserved(double x)
{
return get_exponent(x) == 1024;
}
static inline constexpr double my_abs(double x)
{
uint64_t bits = *(uint64_t*)&x;
bits &= 0x7FFFFFFFFFFFFFFFULL;
return *(double*)&bits;
}
constexpr double make_double(bool sign, int16_t exponent, uint64_t fractal)
{
uint64_t data = (fractal & 0x000FFFFFFFFFFFFFULL);
assert((fractal & 0xFFF0000000000000ULL) == 0x0010000000000000ULL);
if (exponent < -1023) {
fractal >>= (-1022 - exponent);
data = fractal;
exponent = -1023;
}
else if (exponent > 1023) {
return (1-2*sign)*std::numeric_limits<double>::infinity();
}
{
data |= ((uint64_t)((uint16_t)(exponent + 1023))) << 52;
if (sign)
data |= 0x8000000000000000ULL;
return *(double*)&data;
}
}
constexpr double my_sqrt(double x)
{
if(!x || is_reserved(x))
return x;
if(x < 0)
return -std::numeric_limits<double>::quiet_NaN();
uint64_t fraction = get_fraction(x);
int16_t exponent = get_exponent(x);
//C standard says it rounds to zero
int16_t half_exponent = ((exponent-1024)/2)+512;
uint64_t test_fraction = 0x0010000000000000ULL;
double test = make_double(0, half_exponent, test_fraction);
if (test * test > x) half_exponent -= 1;
//just to be safe
test = make_double(0, half_exponent, test_fraction);
if (test * test > x) half_exponent -= 1;
//find each bit except last one, binary search for result
for (int i = 51; i > 0; --i) {
test = make_double(0, half_exponent, test_fraction | (0x01ULL<<i));
if(test*test<x) test_fraction |= (0x01ULL << i);
}
double del1 = my_abs(x - test*test);
double temp = make_double(0, half_exponent, test_fraction | 0x01ULL);
double del2 = my_abs(x - temp * temp);
//see if the whole fraction needs to round up by one
if (x > temp * temp) {
test_fraction += 2;
//rounding up by one made the fraction too large
if (test_fraction >= 0x0020000000000000ULL) {
test_fraction >>= 1;
half_exponent -= 1;
}
double temp2 = make_double(0, half_exponent, test_fraction);
double del3 = my_abs(x - temp2 * temp2);
if(del3 <del2) return temp2;
else return temp;
}
else if(del2<del1) return temp;
else return make_double(0, half_exponent, test_fraction);
}
Edit: add some comments Edit2: add missing functions