28

What's the easiest way to convert a floating point number to its binary representation in Javascript? (e.g. 1.0 -> 0x3F800000).

I have tried to do it manually, and this works to some extent (with usual numbers), but it fails for very big or very small numbers (no range checking) and for special cases (NaN, infinity, etc.):

function floatToNumber(flt)
{
    var sign = (flt < 0) ? 1 : 0;
    flt = Math.abs(flt);
    var exponent = Math.floor(Math.log(flt) / Math.LN2);
    var mantissa = flt / Math.pow(2, exponent);

    return (sign << 31) | ((exponent + 127) << 23) | ((mantissa * Math.pow(2, 23)) & 0x7FFFFF);
}

Am I reinventing the wheel?

EDIT: I've improved my version, now it handles special cases.

function assembleFloat(sign, exponent, mantissa)
{
    return (sign << 31) | (exponent << 23) | (mantissa);
}

function floatToNumber(flt)
{
    if (isNaN(flt)) // Special case: NaN
        return assembleFloat(0, 0xFF, 0x1337); // Mantissa is nonzero for NaN

    var sign = (flt < 0) ? 1 : 0;
    flt = Math.abs(flt);
    if (flt == 0.0) // Special case: +-0
        return assembleFloat(sign, 0, 0);

    var exponent = Math.floor(Math.log(flt) / Math.LN2);
    if (exponent > 127 || exponent < -126) // Special case: +-Infinity (and huge numbers)
        return assembleFloat(sign, 0xFF, 0); // Mantissa is zero for +-Infinity

    var mantissa = flt / Math.pow(2, exponent);
    return assembleFloat(sign, exponent + 127, (mantissa * Math.pow(2, 23)) & 0x7FFFFF);
}

I'm still not sure if this works 100% correctly, but it seems to work good enough. (I'm still looking for existing implementations).

GameZelda
  • 824
  • 1
  • 7
  • 13
  • I'm curious to know why you're converting to an IEEE single precision representation. Aren't Javascript numbers usually stored as double precision (64-bit) quantities? – Mark Dickinson Jun 23 '10 at 11:36
  • You're returning infinity for small values (`exponent < -126`); somehow I don't think that's what you want. (Also, -0.0 ends up with the wrong sign bit, but maybe that doesn't matter for your application.) – Mark Dickinson Jun 23 '10 at 21:01
  • 1
    Mark Dickinson: I'm converting to 32-bit IEEE because the application will generate some values that will be used in a memory editor (that works with the address + bytes format). "exponent < -126" is right according to the Wikipedia. Exponents < -127 can't be represented, and exponent = -127 (that is 0 after adding bias) is used for zero and subnormal numbers (also according to the Wikipedia). And about -0.0, you're right. Is there any way to check if it's -0.0? Comparisons to 0.0 return true. – GameZelda Jun 23 '10 at 23:31
  • About exponents: the input can be any valid IEEE 754 *double precision* value; with your current code, if your input is something tiny like 1e-60 you end up outputting the binary representation for infinity. It might be more appropriate to output the representation for 0.0 instead, which is what you'd naturally get by rounding the double-precision value to single-precision. For distinguishing 0.0 from -0.0, the only way I know of is to look at the result of `atan2(flt, -1.0)` (if JavaScript provided copysign, that would be a better way). – Mark Dickinson Jun 24 '10 at 11:27
  • Right, my code on the negative exponent part is wrong (2^-infinity = 0 and not -infinity). I'll look at the negative zero thing and update the code later. Thanks! – GameZelda Jun 24 '10 at 16:30
  • 3
    You can do it with typed arrays: http://stackoverflow.com/a/10564792/309483 – Janus Troelsen Jan 16 '14 at 17:09

2 Answers2

8

new technologies are making this easy and probably also more forward-compatible. I love extending built in prototypes, not everyone does. So feel free to modify following code to classical procedural approach:

(function() {
    function NumberToArrayBuffer() {
        // Create 1 entry long Float64 array
        return [new Float64Array([this]).buffer];
    }
    function NumberFromArrayBuffer(buffer) {
        // Off course, the buffer must be at least 8 bytes long, otherwise this is a parse error
        return new Float64Array(buffer, 0, 1)[0];
    }
    if(Number.prototype.toArrayBuffer)  {
        console.warn("Overriding existing Number.prototype.toArrayBuffer - this can mean framework conflict, new WEB API conflict or double inclusion.");
    }
    Number.prototype.toArrayBuffer = NumberToArrayBuffer;
    Number.prototype.fromArrayBuffer = NumberFromArrayBuffer;
    // Hide this methods from for-in loops
    Object.defineProperty(Number.prototype, "toArrayBuffer", {enumerable: false});
    Object.defineProperty(Number.prototype, "fromArrayBuffer", {enumerable: false});
})();

Test:

(function() {
    function NumberToArrayBuffer() {
        // Create 1 entry long Float64 array
        return new Float64Array([this.valueOf()]).buffer;
    }
    function NumberFromArrayBuffer(buffer) {
        // Off course, the buffer must be ar least 8 bytes long, otherwise this is a parse error
        return new Float64Array(buffer, 0, 1)[0];
    }
    if(Number.prototype.toArrayBuffer)  {
        console.warn("Overriding existing Number.prototype.toArrayBuffer - this can mean framework conflict, new WEB API conflict or double inclusion.");
    }
    Number.prototype.toArrayBuffer = NumberToArrayBuffer;
    Number.fromArrayBuffer = NumberFromArrayBuffer;
    // Hide this methods from for-in loops
    Object.defineProperty(Number.prototype, "toArrayBuffer", {enumerable: false});
    Object.defineProperty(Number, "fromArrayBuffer", {enumerable: false});
})();
var test_numbers = [0.00000001, 666666666666, NaN, Infinity, -Infinity,0,-0];
console.log("Conversion symethry test: ");
test_numbers.forEach(
      function(num) {
               console.log("         ", Number.fromArrayBuffer((num).toArrayBuffer()));
      }
);

console.log("Individual bytes of a Number: ",new Uint8Array((666).toArrayBuffer(),0,8));
<script src="https://getfirebug.com/firebug-lite-debug.js"></script>
Jonathan Hall
  • 75,165
  • 16
  • 143
  • 189
Tomáš Zato
  • 50,171
  • 52
  • 268
  • 778
  • 4
    So it's basically `new Uint8Array(new Float64Array([num]).buffer,0,8)`. But does the warning """[*The bit pattern that might be observed in an ArrayBuffer after a Number value has been stored into it is not necessarily the same as the internal representation of that Number value used by the ECMAScript implementation.*](https://www.ecma-international.org/ecma-262/7.0/#sec-ecmascript-language-types-number-type)""" only refer to *irrelevant* bit patterns? Or does it also allow bit pattern changes which end up affecting the **observable** value? – Pacerier Apr 17 '17 at 17:33
  • @Pacerier That's a really good question, I'm gonna have to do some research and maybe come with a better solution that is platform independent. – Tomáš Zato Apr 02 '19 at 12:02
  • @Pacerier Some engines pack data into the irrelevant bits, so I think that caveat is just there to account for those. If the bit pattern written to a `Float64Array` represents an observably different value I think that would be a bug. – Jesse Jun 08 '20 at 19:10
  • I could be wrong but my reading of the spec is that this is well-defined. Look at the NumericToRawBytes and RawBytesToNumeric algorithms in the ArrayBuffer section, they explicitly specify IEEE-754 format when reading and storing Float64. I believe the caveat about "internal representation ... used by the implementation" simply means that the CPU's internal floating point registers are not specified but it has to convert to IEEE754 when storing bits in ArrayBuffers. – jw013 Oct 20 '20 at 17:43
6

Here's a function that works on everything I've tested it on, except it doesn't distinguish -0.0 and +0.0.

It's based on code from http://jsfromhell.com/classes/binary-parser, but it's specialized for 32-bit floats and returns an integer instead of a string. I also modified it to make it faster and (slightly) more readable.

// Based on code from Jonas Raoni Soares Silva
// http://jsfromhell.com/classes/binary-parser
function encodeFloat(number) {
    var n = +number,
        status = (n !== n) || n == -Infinity || n == +Infinity ? n : 0,
        exp = 0,
        len = 281, // 2 * 127 + 1 + 23 + 3,
        bin = new Array(len),
        signal = (n = status !== 0 ? 0 : n) < 0,
        n = Math.abs(n),
        intPart = Math.floor(n),
        floatPart = n - intPart,
        i, lastBit, rounded, j, exponent;

    if (status !== 0) {
        if (n !== n) {
            return 0x7fc00000;
        }
        if (n === Infinity) {
            return 0x7f800000;
        }
        if (n === -Infinity) {
            return 0xff800000
        }
    }

    i = len;
    while (i) {
        bin[--i] = 0;
    }

    i = 129;
    while (intPart && i) {
        bin[--i] = intPart % 2;
        intPart = Math.floor(intPart / 2);
    }

    i = 128;
    while (floatPart > 0 && i) {
        (bin[++i] = ((floatPart *= 2) >= 1) - 0) && --floatPart;
    }

    i = -1;
    while (++i < len && !bin[i]);

    if (bin[(lastBit = 22 + (i = (exp = 128 - i) >= -126 && exp <= 127 ? i + 1 : 128 - (exp = -127))) + 1]) {
        if (!(rounded = bin[lastBit])) {
            j = lastBit + 2;
            while (!rounded && j < len) {
                rounded = bin[j++];
            }
        }

        j = lastBit + 1;
        while (rounded && --j >= 0) {
            (bin[j] = !bin[j] - 0) && (rounded = 0);
        }
    }
    i = i - 2 < 0 ? -1 : i - 3;
    while(++i < len && !bin[i]);
    (exp = 128 - i) >= -126 && exp <= 127 ? ++i : exp < -126 && (i = 255, exp = -127);
    (intPart || status !== 0) && (exp = 128, i = 129, status == -Infinity ? signal = 1 : (status !== status) && (bin[i] = 1));

    n = Math.abs(exp + 127);
    exponent = 0;
    j = 0;
    while (j < 8) {
        exponent += (n % 2) << j;
        n >>= 1;
        j++;
    }

    var mantissa = 0;
    n = i + 23;
    for (; i < n; i++) {
        mantissa = (mantissa << 1) + bin[i];
    }
    return ((signal ? 0x80000000 : 0) + (exponent << 23) + mantissa) | 0;
}
Matthew Crumley
  • 101,441
  • 24
  • 103
  • 129