The average GET operation should be under 1ms ranging from 189ns with 1024 entries (349KB in memory) to 888ns for 27,648 entries (6MB in memory). The maximum latency on an entry with 27k entries is 44,000ns. But, if the average time is what matters for you and not a high latency ever so often then this might be basically what you want. I think it can be optimized further, but not sure of the gains to be made.
typedef unsigned int uintptr;
typedef unsigned int uint32;
typedef unsigned short uint16;
typedef unsigned char uint8;
namespace anything { namespace linklist {
typedef struct _HDR {
void *next;
void *prev;
} HDR;
void *next(void *ptr) {
if (ptr == 0) {
return 0;
}
return ((void**)ptr)[0];
}
void add(void **chain, void *toadd) {
((void**)toadd)[0] = *chain;
((void**)toadd)[1] = 0; /* set previous */
/* set previous link if valid pointer */
if (*chain)
((void**)*chain)[1] = toadd;
*chain = toadd;
}
}}
namespace anything{ namespace hash {
typedef struct _B {
MASS_LL_HDR llhdr;
uint32 id;
union {
struct _B *chain;
uintptr value;
};
} B;
typedef struct _HT {
B *buckets;
uint16 depth;
uint8 bbl;
} HT;
void init(HT *ht, uint8 bbl) {
ht->buckets = 0;
ht->bbl = bbl;
}
void _free(B **chain, uint16 dcnt, uint16 dcntmax, uint32 *_m) {
B *ba, *_ba;
for (ba = *chain, _ba = 0; ba; ba = _ba) {
_ba = (B*)mass_ll_next(ba);
if (dcnt < dcntmax - 1) {
_free(&ba->chain, dcnt + 1, dcntmax, _m);
*_m = *_m + 1;
dfree(ba);
}
}
/* zero the chain out */
*chain = 0;
}
void free(HT *ht) {
uint32 m;
uint16 dm;
dm = (sizeof(uintptr) * 8) / ht->bbl;
m = 0;
_free(&ht->buckets, 0, dm, &m);
}
int get(HT *ht, uintptr k, uintptr *v) {
uintptr a;
B *ba, **cur;
uint16 bi, lcnt;
uint32 mask;
lcnt = (sizeof(uintptr) * 8) / ht->bbl;
cur = &ht->buckets;
mask = ~(~0 << ht->bbl);
for (bi = 0; bi < lcnt; ++bi) {
a = (k >> (bi * ht->bbl)) & mask;
for (ba = *cur; ba; ba = (B*)mass_ll_next(ba)) {
if (ba->id == a) {
break;
}
}
if (!ba) {
return 0;
}
cur = &ba->chain;
}
*v = ba->value;
return 1;
}
void put(HT *ht, uintptr k, uintptr v) {
uintptr a;
B *ba, **cur;
uint16 bi, lcnt;
uint32 mask;
lcnt = (sizeof(uintptr) * 8) / ht->bbl;
cur = &ht->buckets;
mask = ~(~0 << ht->bbl);
for (bi = 0; bi < lcnt; ++bi) {
a = (k >> (bi * ht->bbl)) & mask;
for (ba = *cur; ba; ba = (B*)mass_ll_next(ba)) {
if (ba->id == a) {
break;
}
}
if (!ba) {
ba = (B*)dmalloc(sizeof(B));
ba->id = a;
ba->chain = 0;
mass_ll_add((void**)cur, ba);
}
cur = &ba->chain;
}
ba->value = v;
}
}}
anything::hash::HT ht;
anything::hash::init(&ht, 1);
anything::hash::put(&ht, key, value);
if (!anything::hash::get(&ht, key, &value) {
printf("not found!\n");
}
You can reduce the memory usage to around 900kb per 15000 entries using anything::hash::init(&ht, 4), but this increases the latency.