8

What is the most portable and "right" way to do conversion from extended precision float (80-bit value, also known as long double in some compilers) to double (64-bit) in MSVC win32/win64?

MSVC currently (as of 2010) assumes that long double is double synonym.

I could probably write fld/fstp assembler pair in inline asm, but inline asm is not available for win64 code in MSVC. Do I need to move this assembler code to separate .asm file? Is that really so there are no good solution?

phuclv
  • 37,963
  • 15
  • 156
  • 475

4 Answers4

5

Just did this in x86 code...

    .686P
    .XMM

_TEXT   SEGMENT

EXTRN   __fltused:DWORD

PUBLIC  _cvt80to64
PUBLIC  _cvt64to80

_cvt80to64 PROC

    mov eax, dword ptr [esp+4]
    fld TBYTE PTR [eax]

    ret 0
_cvt80to64 ENDP


_cvt64to80 PROC
    mov eax, DWORD PTR [esp+12]
    fld QWORD PTR [esp+4]
    fstp    TBYTE PTR [eax]
    ret 0
_cvt64to80 ENDP

ENDIF

_TEXT   ENDS
    END
4

If your compiler / platform doesn't have native support for 80 bit floating point values, you have to decode the value yourself.

Assuming that the 80 bit float is stored within a byte buffer, located at a specific offset, you can do it like this:

float64 C_IOHandler::readFloat80(IColl<uint8> buffer, uint32 *ref_offset)
{
    uint32 &offset = *ref_offset;

    //80 bit floating point value according to the IEEE-754 specification and the Standard Apple Numeric Environment specification:
    //1 bit sign, 15 bit exponent, 1 bit normalization indication, 63 bit mantissa

    float64 sign;
    if ((buffer[offset] & 0x80) == 0x00)
        sign = 1;
    else
        sign = -1;
    uint32 exponent = (((uint32)buffer[offset] & 0x7F) << 8) | (uint32)buffer[offset + 1];
    uint64 mantissa = readUInt64BE(buffer, offset + 2);

    //If the highest bit of the mantissa is set, then this is a normalized number.
    float64 normalizeCorrection;
    if ((mantissa & 0x8000000000000000) != 0x00)
        normalizeCorrection = 1;
    else
        normalizeCorrection = 0;
    mantissa &= 0x7FFFFFFFFFFFFFFF;

    offset += 10;

    //value = (-1) ^ s * (normalizeCorrection + m / 2 ^ 63) * 2 ^ (e - 16383)
    return (sign * (normalizeCorrection + (float64)mantissa / ((uint64)1 << 63)) * g_Math->toPower(2, (int32)exponent - 16383));
}

This is how I did it, and it compiles fine with g++ 4.5.0. It of course isn't a very fast solution, but at least a functional one. This code should also be portable to different platforms, though I didn't try.

emkey08
  • 41
  • 2
2

I've just written this one. It constructs an IEEE double number from IEEE extended precision number using bit operations. It takes the 10 byte extended precision number in little endian format:

typedef unsigned long long uint64;

double makeDoubleFromExtended(const unsigned char x[10])
{
    int exponent = (((x[9] << 8) | x[8]) & 0x7FFF);
    uint64 mantissa =
        ((uint64)x[7] << 56) | ((uint64)x[6] << 48) | ((uint64)x[5] << 40) | ((uint64)x[4] << 32) | 
        ((uint64)x[3] << 24) | ((uint64)x[2] << 16) | ((uint64)x[1] << 8) | (uint64)x[0];
    unsigned char d[8] = {0};
    double result;

    d[7] = x[9] & 0x80; /* Set sign. */

    if ((exponent == 0x7FFF) || (exponent == 0))
    {
        /* Infinite, NaN or denormal */
        if (exponent == 0x7FFF)
        {
            /* Infinite or NaN */
            d[7] |= 0x7F;
            d[6] = 0xF0;
        }
        else
        {
            /* Otherwise it's denormal. It cannot be represented as double. Translate as singed zero. */
            memcpy(&result, d, 8);
            return result;
        }
    }
    else
    {
        /* Normal number. */
        exponent = exponent - 0x3FFF + 0x03FF; /*< exponent for double precision. */

        if (exponent <= -52)  /*< Too small to represent. Translate as (signed) zero. */
        {
            memcpy(&result, d, 8);
            return result;
        }
        else if (exponent < 0)
        {
            /* Denormal, exponent bits are already zero here. */
        }
        else if (exponent >= 0x7FF) /*< Too large to represent. Translate as infinite. */
        {
            d[7] |= 0x7F;
            d[6] = 0xF0;
            memset(d, 0x00, 6);
            memcpy(&result, d, 8);
            return result;
        }
        else
        {
            /* Representable number */
            d[7] |= (exponent & 0x7F0) >> 4;
            d[6] |= (exponent & 0xF) << 4;
        }
    }
    /* Translate mantissa. */

    mantissa >>= 11;

    if (exponent < 0)
    {
        /* Denormal, further shifting is required here. */
        mantissa >>= (-exponent + 1);
    }

    d[0] = mantissa & 0xFF;
    d[1] = (mantissa >> 8) & 0xFF;
    d[2] = (mantissa >> 16) & 0xFF;
    d[3] = (mantissa >> 24) & 0xFF;
    d[4] = (mantissa >> 32) & 0xFF;
    d[5] = (mantissa >> 40) & 0xFF;
    d[6] |= (mantissa >> 48) & 0x0F;

    memcpy(&result, d, 8);

    printf("Result: 0x%016llx", *(uint64*)(&result) );

    return result;
}
Calmarius
  • 18,570
  • 18
  • 110
  • 157
  • I think that the treatment of the case `if (exponent <= 0)` means that numbers that could be represented as binary64 subnormals end up represented as `0.0`. – Pascal Cuoq Sep 17 '13 at 16:57
  • Fixed it. The exact `exponent == 0` case can indeed yield a denormal double. – Calmarius Sep 17 '13 at 20:08
  • It's still not quite right: the cases in which there is something to do are the cases when `exponent` is between -52 and 0 (at that point of the code), and what must be done is **roughly** to make the implicit bit explicit, shift the significand that was going to be used to the right by `-exponent`, and set `exponent` to zero. The OP ended up using `FSTP`, so it's not very important, but you'll need to do it for `FSTP` in your emulator :) – Pascal Cuoq Sep 18 '13 at 13:33
  • Scratch the part about making an implicit bit explicit: the leading 1 is already explicit in `mantissa`. – Pascal Cuoq Sep 18 '13 at 14:11
  • Just saw this while looking for something else, and... out of curiosity, why did you `typedef unsigned __int64 uint64;`, instead of using `uint64_t` from ``? Was it to remove a dependency that isn't really necessary because you're only using a single name, because your compiler didn't support it, or for some other reason? – Justin Time - Reinstate Monica Jun 20 '16 at 21:53
  • @JustinTime don't know... VC9 don't have stdint. Replaced it with long long. – Calmarius Jun 21 '16 at 05:39
  • @Calmarius I wasn't asking why you used `__int64` instead of `long long`, just why you made your own `uint64` instead of using `uint64_t`. According to the MSDN, `__int64` was added in VS 6.0 (so it was indeed present in VS9); apparently, they actually had `__int64` before they had `long long`. My question was just why you made your own typedef, instead of using the functionally identical one from the standard header; I wasn't aware that VS9 didn't have ``, and thought you did it because you didn't need any other types from the header. – Justin Time - Reinstate Monica Jun 22 '16 at 17:11
0

Played with the given answers and ended up with this.

#include <cmath>
#include <limits>
#include <cassert>

#ifndef _M_X64

__inline __declspec(naked) double _cvt80to64(void* ) {
  __asm {
  //  PUBLIC _cvt80to64 PROC

    mov eax, dword ptr [esp+4]
    fld TBYTE PTR [eax]

    ret 0
  //    _cvt80to64 ENDP
  }
}

#endif

#pragma pack(push)
#pragma pack(2)
typedef unsigned char tDouble80[10];
#pragma pack(pop)


typedef struct {
  unsigned __int64 mantissa:64;
  unsigned int exponent:15;
  unsigned int sign:1;
} tDouble80Struct;

inline double convertDouble80(const tDouble80& val)
{
  assert(10 == sizeof(tDouble80));

  const tDouble80Struct* valStruct = reinterpret_cast<const tDouble80Struct*>(&val);

  const unsigned int mask_exponent = (1 << 15) - 1;
  const unsigned __int64 mantissa_high_highestbit = unsigned __int64(1) << 63;
  const unsigned __int64 mask_mantissa = (unsigned __int64(1) << 63) - 1;

  if (mask_exponent == valStruct->exponent) {

    if(0 == valStruct->mantissa) {
      return (0 != valStruct->sign) ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity();
    }

    // highest mantissa bit set means quiet NaN
    return (0 != (mantissa_high_highestbit & valStruct->mantissa)) ? std::numeric_limits<double>::quiet_NaN() :  std::numeric_limits<double>::signaling_NaN();
  }   

  // 80 bit floating point value according to the IEEE-754 specification and 
  // the Standard Apple Numeric Environment specification:
  // 1 bit sign, 15 bit exponent, 1 bit normalization indication, 63 bit mantissa

  const double sign(valStruct->sign ? -1 : 1);


  //If the highest bit of the mantissa is set, then this is a normalized number.
  unsigned __int64 mantissa = valStruct->mantissa;
  double normalizeCorrection = (mantissa & mantissa_high_highestbit) != 0 ? 1 : 0;
  mantissa &= mask_mantissa;

  //value = (-1) ^ s * (normalizeCorrection + m / 2 ^ 63) * 2 ^ (e - 16383)
  return (sign * (normalizeCorrection + double(mantissa) / mantissa_high_highestbit) * pow(2.0, int(valStruct->exponent) - 16383));
}
Totonga
  • 4,236
  • 2
  • 25
  • 31