I'm trying to create 8 hand coded fast hash algorithms with as much parallelism (i.e. CPU pipelining) as is reasonable for fixed length keys of size 1..8 bytes. I am creating 8 hash functions in order that the key size is a compile time constant. I am looking at common hash algorithms, and trying to unroll them. At the same time I thought I would just ask around to see if anyone had a link or knew a way to do it.
Our set of characters being hashed is normally just 0..9 A..Z SPC with the occasional binary short or long. I am aware the 1 char hash is likely useless, so I will make it so our code won't compile with that small a hash key. Perfection is not as important as speed. This means, the occasional bucket with 2 times the items is ok if we eliminate enough CPU cost generating the hash. I have extensively profiled our code for years now, so I know how much money it will save our company if I take the next step to make the hashing algorithm better/faster.
I can already do better than the hash function our apps currently use, UlHashOld below (which will likely be changed as well), but I think with some help from SO I can make it a little better. For instance, my hash function for 2 chars cuts the max bucket count in half compared to UlHashOld, but it still has a number of unfilled buckets. If would like to get the two char hash to better spread out its bits when doing alpha numeric keys. I've tested this with the following code:
template <int cb> class CFixLenKeyBase {
protected:
char m_ach[cb];
public:
CFixLenKeyBase(LPCSTR sz) { memcpy(m_ach, sz, cb); }
UL UlHashOld() {
UL n = 0;
for (LPCSTR sz = m_ach, szEnd = sz + cb; sz < szEnd; sz++)
n = n ^ (n >> 25) ^ (n << 7) ^ *sz;
return n;
}
};
template <int cb> class CFixLenKey : public CFixLenKeyBase<cb> {
public:
CFixLenKey(LPCSTR sz) : CFixLenKeyBase(sz) { }
};
template <> class CFixLenKey<1> : public CFixLenKeyBase<1> {
public:
CFixLenKey(LPCSTR sz) : CFixLenKeyBase(sz) { }
UL UlHash(void) const { return *m_ach; }
};
template <> class CFixLenKey<2> : public CFixLenKeyBase<2> {
public:
CFixLenKey(LPCSTR sz) : CFixLenKeyBase(sz) { }
UL UlHash(void) const {
auto a = *(WORD*)m_ach;
return b = (a >> 6) ^ a;
}
};
template <> class CFixLenKey<3> : public CFixLenKeyBase<3> {
public:
CFixLenKey(LPCSTR sz) : CFixLenKeyBase(sz) { }
UL UlHash(void) const {
UL a = (*(UL*)m_ach)&0xFFFFFF;
UL b = (a >> 14) ^ (a >> 6);
UL c = (a >> 11) ^ (a << 5);
UL d = (a >> 19) ^ (a << 9);
return d ^ c ^ b;
}
};
char a[36*36*36*36][4] = {0};
int main(int argc, char* argv[])
{
const int cBuckets = 256;
int aHisto[4][cBuckets] = { 0 };
int aHistoOld[4][cBuckets] = { 0 };
int aMax[4] = { 0 };
int cElem = 36*36*36*36;
memcpy(a[0], "0000", 4);
for (int iElem=1 ; iElem<cElem ; ++iElem) {
auto& b = a[iElem];
memcpy(b,a[iElem-1], 4);
int i = 3;
while (1) {
b[i]++;
if (b[i] == '9' + 1) {
b[i] = 'A';
} else if (b[i] == 'Z' + 1) {
b[i] = '0';
--i;
} else {
break;
}
}
}
int fMask = cBuckets - 1;
int ah[4] = {0}, ahOld[4] = {0}, aSame[4] = {0},aSameOld[4] = {0};
for (int iElem=0 ; iElem<cElem ; ++iElem) {
int h, hOld, n, cb;
cb = 0;
CFixLenKey<1> A(a[iElem]);
if ((h = A.UlHash()) == ah[cb] && memcmp(a+iElem, a+iElem-1,cb))
aSame[cb]++;
if ((hOld = A.UlHashOld()) == ahOld[cb] && memcmp(a+iElem, a+iElem-1,cb))
aSameOld[cb]++;
ahOld[cb] = hOld; ah[cb]=h;
aHisto[cb][h&fMask]++;
aHistoOld[cb][hOld&fMask]++;
cb = 1;
CFixLenKey<2> B(a[iElem]);
if ((h = B.UlHash()) == ah[cb] && memcmp(a+iElem, a+iElem-1,cb))
aSame[cb]++;
if ((hOld = B.UlHashOld()) == ahOld[cb] && memcmp(a+iElem, a+iElem-1,cb))
aSameOld[cb]++;
ahOld[cb] = hOld; ah[cb]=h;
aHisto[cb][h&fMask]++;
aHistoOld[cb][hOld&fMask]++;
cb = 2;
CFixLenKey<3> C(a[iElem]);
if ((h = C.UlHash()) == ah[cb] && memcmp(a+iElem, a+iElem-1,cb))
aSame[cb]++;
if ((hOld = C.UlHashOld()) == ahOld[cb] && memcmp(a+iElem, a+iElem-1,cb))
aSameOld[cb]++;
ahOld[cb] = hOld; ah[cb]=h;
aHisto[cb][h&fMask]++;
aHistoOld[cb][hOld&fMask]++;
}
for (int cb=0 ; cb<4 ; ++cb) {
int nMin=aHisto[cb][0], nMax = nMin;
for (int i=0 ; i<fMask ; ++i) {
if (aHisto[cb][i] < nMin)
nMin = aHisto[cb][i];
if (aHisto[cb][i] > nMax)
nMax = aHisto[cb][i];
}
int nMinOld=aHistoOld[cb][0], nMaxOld = nMinOld;
for (int i=0 ; i<fMask ; ++i) {
if (aHistoOld[cb][i] < nMinOld)
nMinOld = aHistoOld[cb][i];
if (aHistoOld[cb][i] > nMaxOld)
nMaxOld = aHistoOld[cb][i];
}
printf("%d %03d %03d %03d %03d Same: %04d %04d\n", cb, nMin, nMax, nMinOld, nMaxOld, aSame[cb], aSameOld[cb]);
//for (int i=0 ; i<fMask ; ++i) {
// printf("%03d ", aHisto[cb][i]);
// if ((i&31) == 31)
// printf("\n");
//}
printf("\n");
}
return 0;
}