4

Is there a javascript implemenation of the Jenkins Hash I could use - rather than implementing it on my own?

I know there is a python implemenation I could use to write my own js. But I am no Javascript expert and therefor I would prefer someone elses implementation.


Update: (I did try to convert http://www.burtleburtle.net/bob/c/lookup3.c) Update 2: This code gives you the same hashes as hashlittle2 in lookup3.c

var Jenkins = {
rot: function(x,k) {
    return (x<<k) | (x>>>(32-k));
},

mix: function(a,b,c) {
    a = (a - c) | 0;  a ^= Jenkins.rot(c, 4);  c = (c + b) | 0;
    b = (b - a) | 0;  b ^= Jenkins.rot(a, 6);  a = (a + c) | 0;
    c = (c - b) | 0;  c ^= Jenkins.rot(b, 8);  b = (b + a) | 0;
    a = (a - c) | 0;  a ^= Jenkins.rot(c,16);  c = (c + b) | 0;
    b = (b - a) | 0;  b ^= Jenkins.rot(a,19);  a = (a + c) | 0;
    c = (c - b) | 0;  c ^= Jenkins.rot(b, 4);  b = (b + a) | 0;
    return {a : a, b : b, c : c};
},

final: function(a,b,c) {
   c ^= b; c -= Jenkins.rot(b,14) | 0;
   a ^= c; a -= Jenkins.rot(c,11) | 0;
   b ^= a; b -= Jenkins.rot(a,25) | 0;
   c ^= b; c -= Jenkins.rot(b,16) | 0;
   a ^= c; a -= Jenkins.rot(c,4) | 0;
   b ^= a; b -= Jenkins.rot(a,14) | 0;
   c ^= b; c -= Jenkins.rot(b,24) | 0;
   return {a : a, b : b, c : c};
},  

hashlittle2: function(k, initval, initval2) {
    var length = k.length;
    a = b = c = 0xdeadbeef + length + initval;
    c += initval2;

    offset = 0;
    while (length > 12) {
        a += (k.charCodeAt(offset+0) + (k.charCodeAt(offset+1)<<8) + (k.charCodeAt(offset+2)<<16) + (k.charCodeAt(offset+3)<<24)); a = a>>>0;
        b += (k.charCodeAt(offset+4) + (k.charCodeAt(offset+5)<<8) + (k.charCodeAt(offset+6)<<16) + (k.charCodeAt(offset+7)<<24)); b = b>>>0;
        c += (k.charCodeAt(offset+8) + (k.charCodeAt(offset+9)<<8) + (k.charCodeAt(offset+10)<<16) + (k.charCodeAt(offset+11)<<24)); c = c>>>0;
        o = Jenkins.mix(a,b,c);
        a = o.a; b = o.b; c = o.c;
        length -= 12;
        offset += 12;
    }

    switch(length) {
        case 12: c += (k.charCodeAt(offset+8) + (k.charCodeAt(offset+9)<<8) + (k.charCodeAt(offset+10)<<16) + (k.charCodeAt(offset+11)<<24)); b += (k.charCodeAt(offset+4) + (k.charCodeAt(offset+5)<<8) + (k.charCodeAt(offset+6)<<16) + (k.charCodeAt(offset+7)<<24)); a += (k.charCodeAt(offset+0) + (k.charCodeAt(offset+1)<<8) + (k.charCodeAt(offset+2)<<16) + (k.charCodeAt(offset+3)<<24)); break;
        case 11: c += (k.charCodeAt(offset+8) + (k.charCodeAt(offset+9)<<8) + (k.charCodeAt(offset+10)<<16)); b += (k.charCodeAt(offset+4) + (k.charCodeAt(offset+5)<<8) + (k.charCodeAt(offset+6)<<16) + (k.charCodeAt(offset+7)<<24)); a += (k.charCodeAt(offset+0) + (k.charCodeAt(offset+1)<<8) + (k.charCodeAt(offset+2)<<16) + (k.charCodeAt(offset+3)<<24)); break;
        case 10: c += (k.charCodeAt(offset+8) + (k.charCodeAt(offset+9)<<8)); b += (k.charCodeAt(offset+4) + (k.charCodeAt(offset+5)<<8) + (k.charCodeAt(offset+6)<<16) + (k.charCodeAt(offset+7)<<24)); a += (k.charCodeAt(offset+0) + (k.charCodeAt(offset+1)<<8) + (k.charCodeAt(offset+2)<<16) + (k.charCodeAt(offset+3)<<24)); break;
        case 9: c += (k.charCodeAt(offset+8)); b += (k.charCodeAt(offset+4) + (k.charCodeAt(offset+5)<<8) + (k.charCodeAt(offset+6)<<16) + (k.charCodeAt(offset+7)<<24)); a += (k.charCodeAt(offset+0) + (k.charCodeAt(offset+1)<<8) + (k.charCodeAt(offset+2)<<16) + (k.charCodeAt(offset+3)<<24)); break;
        case 8: b += (k.charCodeAt(offset+4) + (k.charCodeAt(offset+5)<<8) + (k.charCodeAt(offset+6)<<16) + (k.charCodeAt(offset+7)<<24)); a += (k.charCodeAt(offset+0) + (k.charCodeAt(offset+1)<<8) + (k.charCodeAt(offset+2)<<16) + (k.charCodeAt(offset+3)<<24)); break;
        case 7: b += (k.charCodeAt(offset+4) + (k.charCodeAt(offset+5)<<8) + (k.charCodeAt(offset+6)<<16)); a += (k.charCodeAt(offset+0) + (k.charCodeAt(offset+1)<<8) + (k.charCodeAt(offset+2)<<16) + (k.charCodeAt(offset+3)<<24)); break;
        case 6: b += ((k.charCodeAt(offset+5)<<8) + k.charCodeAt(offset+4)); a += (k.charCodeAt(offset+0) + (k.charCodeAt(offset+1)<<8) + (k.charCodeAt(offset+2)<<16) + (k.charCodeAt(offset+3)<<24)); break;
        case 5: b += (k.charCodeAt(offset+4)); a += (k.charCodeAt(offset+0) + (k.charCodeAt(offset+1)<<8) + (k.charCodeAt(offset+2)<<16) + (k.charCodeAt(offset+3)<<24)); break;
        case 4: a += (k.charCodeAt(offset+0) + (k.charCodeAt(offset+1)<<8) + (k.charCodeAt(offset+2)<<16) + (k.charCodeAt(offset+3)<<24)); break;
        case 3: a += (k.charCodeAt(offset+0) + (k.charCodeAt(offset+1)<<8) + (k.charCodeAt(offset+2)<<16)); break;
        case 2: a += (k.charCodeAt(offset+0) + (k.charCodeAt(offset+1)<<8)); break;
        case 1: a += (k.charCodeAt(offset+0)); break;
        case 0: return {b : b, c : c};
    }

    o = Jenkins.final(a,b,c);
    a = o.a; b = o.b; c = o.c;

    return {b : b>>>0, c : c>>>0};
}    

}

Valentin
  • 373
  • 2
  • 13
  • C'mon, it'll be fun. Besides, if you have an existing source of valid data for tests... :p –  Nov 20 '10 at 19:21
  • 1
    Bitwise OR instead of `+` is significantly more efficient when combining bytes into 32-bit words. Do all you can to prevent JS from converting back to a float. You can see the difference when benchmarking it. You can also remove `break` statements and simply add relevant parts instead of the whole thing, since `case 12`, `case 11` can be implemented as a bitmask. – bryc Apr 10 '18 at 00:57

3 Answers3

4

Here's a JavaScript port I did of lookup3.c for a personal project to implement a Bloom filter in JavaScript. I can't say for sure whether it produces identical results to the C code though.

One of the main things that does not translate directly into JavaScript is the pointer arithmetic performed on the pointer to the key input. Look for the word offset in the code below to see how I handled that.

If you want an output as if the integer is unsigned, you can use returnValue >>> 0.

var BloomFilter = {
    // Convert a JavaScript string into an array of 32-bit words.
    // This preserves the UTF-16 encoding, padding with the null character if necessary.
    stringToWords: function(s) {
        var b = [];
        if(s.length & 1) {
            s += "\u0000";
        }
        for (var i = 0; i < s.length; i += 2) {
            b.push((s.charCodeAt(i) << 16) | (s.charCodeAt(i + 1)));
        }
        return b;
    },

    // Hash an array of multiple 32-bit words to a single word.
    // Adapted from "lookup3.c, by Bob Jenkins, May 2006, Public Domain."
    // as retrieved 2010-07-03 from http://burtleburtle.net/bob/c/lookup3.c

    hashWord: function(k, initval) {
        // definition of bitwise rotate function
        function rot(x, k) {
            return (x << k) | (x >>> (32 - k));
        }

        // initialization
        var a, b, c, length = k.length, offset = 0;
        a = b = c = (0xdeadbeef + (length << 2) + initval) | 0;

        // handle most of the key
        while(length > 3) {
            a = (a + k[offset]) | 0;
            b = (b + k[offset + 1]) | 0;
            c = (c + k[offset + 2]) | 0;

            // mixing function
            a = (a - c) | 0;  a ^= rot(c, 4);  c = (c + b) | 0;
            b = (b - a) | 0;  b ^= rot(a, 6);  a = (a + c) | 0;
            c = (c - b) | 0;  c ^= rot(b, 8);  b = (b + a) | 0;
            a = (a - c) | 0;  a ^= rot(c,16);  c = (c + b) | 0;
            b = (b - a) | 0;  b ^= rot(a,19);  a = (a + c) | 0;
            c = (c - b) | 0;  c ^= rot(b, 4);  b = (b + a) | 0;

            length -= 3;
            offset += 3;
        }

        // handle the final words if left over; fall-through is intended
        switch(length) {
            case 3: c = (c + k[offset + 2]) | 0;
            case 2: b = (b + k[offset + 1]) | 0;
            case 1: a = (a + k[offset]) | 0;

            // final mixing
            c ^= b; c = (c - rot(b,14)) | 0;
            a ^= c; a = (a - rot(c,11)) | 0;
            b ^= a; b = (b - rot(a,25)) | 0;
            c ^= b; c = (c - rot(b,16)) | 0;
            a ^= c; a = (a - rot(c, 4)) | 0;
            b ^= a; b = (b - rot(a,14)) | 0;
            c ^= b; c = (c - rot(b,24)) | 0;

            case 0: break; // nothing left to do
        }

        // return the result
        return c;
    },

    // Hash a string by converting to UTF-16 before using the lookup3 algorithm.
    hashString: function(s) {
        return BloomFilter.hashWord(BloomFilter.stringToWords(s), 0);
    }
}
Community
  • 1
  • 1
PleaseStand
  • 31,641
  • 6
  • 68
  • 95
  • It definitely produces different results. `a,b,c` take input from adjacent bytes (3 per loop) instead of 4 bytes each (12 per loop), making the calculations different. Also the shift in `length << 2` is not necessary. – bryc Apr 10 '18 at 00:53
2

With just a few modifications, the C code on Wikipedia will work perfectly in JavaScript. Just get rid of the data types and use key.length instead of having to pass in the length.

casablanca
  • 69,683
  • 7
  • 133
  • 150
  • But that is a *different* (and much simpler) hash than the "My Hash" Jenkin's presents at: http://www.burtleburtle.net/bob/hash/doobs.html, which contains a mix phase among other differences. (The one-at-a-time-hash is just one of the discussion hashes in said link.) –  Nov 20 '10 at 19:32
  • @pst: Well, that's still just a bunch of shifts and other arithmetic operators, so most of it will work unchanged in JavaScript. – casablanca Nov 20 '10 at 19:35
  • @casablanca Just point it out :-) However, I believe on difference may be in how overflows are treated -- this must be manually *added* into the JavaScript logic. The "My Hash" assumes an unsigned 4-bytes, and I believe on a 2's complement. –  Nov 20 '10 at 19:36
  • @pst: Sure, thanks for that. :) I'll leave it to the OP to choose whichever suits him. – casablanca Nov 20 '10 at 19:37
  • @casablanca Actually, the overflow issue also plagues the "direct conversion" of the C code on the wiki. Must be careful to emulate. –  Nov 20 '10 at 19:40
  • Thanks for all the input. I guess I'll try to convert this code into javascript: http://www.burtleburtle.net/bob/c/lookup3.c – Valentin Nov 20 '10 at 20:35
1

Here is an optimized port I did of lookup3 (2006):

function lookup3(k, init = 0, init2 = 0) {
    var len = k.length, o = 0,
        a = 0xdeadbeef + len + init | 0,
        b = 0xdeadbeef + len + init | 0,
        c = 0xdeadbeef + len + init + init2 | 0;

    while (len > 12) {
        a += k[o]   | k[o+1] << 8 | k[o+2]  << 16 | k[o+3]  << 24;
        b += k[o+4] | k[o+5] << 8 | k[o+6]  << 16 | k[o+7]  << 24;
        c += k[o+8] | k[o+9] << 8 | k[o+10] << 16 | k[o+11] << 24;

        a -= c; a ^= c<<4  | c>>>28; c = c+b | 0;
        b -= a; b ^= a<<6  | a>>>26; a = a+c | 0;
        c -= b; c ^= b<<8  | b>>>24; b = b+a | 0;
        a -= c; a ^= c<<16 | c>>>16; c = c+b | 0;
        b -= a; b ^= a<<19 | a>>>13; a = a+c | 0;
        c -= b; c ^= b<<4  | b>>>28; b = b+a | 0;

        len -= 12, o += 12;
    }

    if(len > 0) { // final mix only if len > 0
        switch (len) { // incorporate trailing bytes before fmix
            case 12: c += k[o+11] << 24;
            case 11: c += k[o+10] << 16;
            case 10: c += k[o+9] << 8;
            case  9: c += k[o+8];
            case  8: b += k[o+7] << 24;
            case  7: b += k[o+6] << 16;
            case  6: b += k[o+5] << 8;
            case  5: b += k[o+4];
            case  4: a += k[o+3] << 24;
            case  3: a += k[o+2] << 16;
            case  2: a += k[o+1] << 8;
            case  1: a += k[o];
        }

        c ^= b; c -= b<<14 | b>>>18;
        a ^= c; a -= c<<11 | c>>>21;
        b ^= a; b -= a<<25 | a>>>7;
        c ^= b; c -= b<<16 | b>>>16;
        a ^= c; a -= c<<4  | c>>>28;
        b ^= a; b -= a<<14 | a>>>18;
        c ^= b; c -= b<<24 | b>>>8;
    }
    // use c as 32-bit hash; add b for 64-bit hash. a is not mixed well.
    return [b >>> 0, c >>> 0];
}

And here is lookup2 (1996) which is very similar (but slower):

function lookup2(k, init = 0) {
    var len = k.length, o = 0, 
        a = 0x9e3779b9 | 0,
        b = 0x9e3779b9 | 0,
        c = init | 0;

    while (len >= 12) {
        a += k[o]   | k[o+1] << 8 | k[o+2]  << 16 | k[o+3]  << 24;
        b += k[o+4] | k[o+5] << 8 | k[o+6]  << 16 | k[o+7]  << 24;
        c += k[o+8] | k[o+9] << 8 | k[o+10] << 16 | k[o+11] << 24;

        a -= b; a -= c; a ^= c >>> 13;
        b -= c; b -= a; b ^= a << 8;
        c -= a; c -= b; c ^= b >>> 13;
        a -= b; a -= c; a ^= c >>> 12;
        b -= c; b -= a; b ^= a << 16;
        c -= a; c -= b; c ^= b >>> 5;
        a -= b; a -= c; a ^= c >>> 3;
        b -= c; b -= a; b ^= a << 10;
        c -= a; c -= b; c ^= b >>> 15;

        len -= 12, o += 12;
    }

    c += k.length;
    switch(len) {
        case 11: c += k[o+10] << 24;
        case 10: c += k[o+9] << 16;
        case  9: c += k[o+8] << 8;
        // the first byte of c is reserved for the length
        case  8: b += k[o+7] << 24;
        case  7: b += k[o+6] << 16;
        case  6: b += k[o+5] << 8;
        case  5: b += k[o+4];
        case  4: a += k[o+3] << 24;
        case  3: a += k[o+2] << 16;
        case  2: a += k[o+1] << 8;
        case  1: a += k[o];
    }

    a -= b; a -= c; a ^= c >>> 13;
    b -= c; b -= a; b ^= a << 8;
    c -= a; c -= b; c ^= b >>> 13;
    a -= b; a -= c; a ^= c >>> 12;
    b -= c; b -= a; b ^= a << 16;
    c -= a; c -= b; c ^= b >>> 5;
    a -= b; a -= c; a ^= c >>> 3;
    b -= c; b -= a; b ^= a << 10;
    c -= a; c -= b; c ^= b >>> 15;
    // c is intended as 32-bit hash only
    return c >>> 0;
}
bryc
  • 12,710
  • 6
  • 41
  • 61