i have to implement the data structure 'Linear Hashing' for my algorithms course, but i ran into some problems. I have implemented the add-function straight forward. I have to use the following hashfunction:
h(x) = x mod 2^d
My testvalues are:
24200, 16448, 16432, 4044, 24546, 10556, 29009, 25270, 32579, 13047
When i add this values to my data structure, i get the following:
LinearHashing [ TableSize=4 BucketSize=2 d=2 nextToSplit=0 ]
Elemente
values=
Bucket[0]
[24200] [16448]
Overflow Bucket:
[16432] [4044]
[24546] [10556]
[25270] [0]
Bucket[1]
[29009] [0]
Bucket[2]
[0] [0]
Bucket[3]
[32579] [13047]
But the value "25270" ends up in the wrong bucket, I have rechecked it a thousand times and i cant find my mistake. The bug is fixed when i set the Overflowbucketsize to "N/2" but this might be just a coincidence. The example should look like this(if i got the algorithm right):
LinearHashing [ TableSize=4 BucketSize=2 d=2 nextToSplit=0 ]
Elemente
values=
Bucket[0]
[24200] [16448]
Overflow Bucket:
[16432] [4044]
[24546] [10556]
[0] [0]
Bucket[1]
[29009] [0]
Bucket[2]
[25270] [0]
Bucket[3]
[32579] [13047]
I have to test my data structure with this testprogramm:
and my data structure has to based on this *.h file which was provided by my professor:
and this is my complete code:
The most important functions are the following. My add-function:
template <typename E, size_t N>
void LinearHashing<E,N>::add(const E e[], size_t len) {
for(size_t i=0; i < len; ++i){
size_t address = compIndex(e[i]);
for(size_t j=0; j < N; ++j){
if(table[address].bucketstatus[j] == leer){
table[address].bucketElem[j] = e[i];
table[address].bucketstatus[j] = besetzt;
break;
}else if(j == (N-1) && table[address].newoverflow == nullptr){
table[address].newoverflow = new OverflowContainer();
addToOverflow(e[i],table[address].newoverflow);
split();
reHash(nts);
if(nts+1 == (pow(2, d))){
nts=0;
++d;
}else{
++nts;
}
}else if(j == (N-1) && table[address].newoverflow != nullptr){
addToOverflow(e[i],table[address].newoverflow);
}
}
}
}
The index of the element is calculated with the following two functions:
hashValue:
template <typename E> inline size_t hashValue(const E& e) { return size_t(e); }
compIndex:
template <typename E, size_t N>
inline size_t LinearHashing<E, N>::compIndex(E e){
size_t adrOP = powl(2,d);
return hashValue(e) % adrOP;
}
Overflows are added by the following function:
template <typename E, size_t N>
void LinearHashing<E, N>::addToOverflow(E e, LinearHashing::OverflowContainer *container) {
size_t i=0;
while( i < N/2){
if(container->overflowstatus[i] == leer){
container->overflowbucket[i] = e;
container->overflowstatus[i] = besetzt;
break;
}else if(i == (N/2)-1 && container->nextOverflow == nullptr){
container->nextOverflow = new OverflowContainer();
container = container->nextOverflow;
split();
reHash(nts);
if(nts+1 == (pow(2, d))){
nts=0;
++d;
}else{
++nts;
}
i=0;
}else if(i == (N/2)-1 && container->nextOverflow != nullptr){
container = container->nextOverflow;
i=0;
}else{
++i;
}
}
}
To perform a split i use this function:
template <typename E, size_t N>
void LinearHashing<E,N>::split(){
Bucket* tmp = new Bucket[size()+1];
std::copy(table,table+size(),tmp);
delete[] table;
table = tmp;
++n;
}
For now i perform a split when a new Overflow is created, to rehash the nextToSplit line i use the following rehash-functions:
template <typename E, size_t N>
void LinearHashing<E, N>::reHash(size_t index){
bool reHashed = false;
for(size_t i = 0; i < N; ++i){
if(compIndex(table[index].bucketElem[i]) != compReIndex(table[index].bucketElem[i]) && table[index].bucketstatus[i] == besetzt){
for (size_t j = 0; j < N && !reHashed ; ++j) {
if(table[n-1].bucketstatus[j] == leer){
table[n-1].bucketElem[j] = table[index].bucketElem[i];
table[n-1].bucketstatus[j] = besetzt;
table[index].bucketstatus[i] = leer;
table[index].bucketElem[i] = 0;
reHashed = true;
}
}
reHashed = false;
}
}
if(table[index].newoverflow != nullptr){
reHashOverflow(table[index].newoverflow);
}
}
template <typename E, size_t N>
void LinearHashing<E, N>::reHashOverflow(OverflowContainer* container) {
size_t i=0;
bool reHashed = false;
while(i < (N/2)){
if(compIndex(container->overflowbucket[i]) != compReIndex(container->overflowbucket[i]) && container->overflowstatus[i] == besetzt){
for(size_t j=0; j < N && !reHashed; ++j){
if(table[n-1].bucketstatus[j] == leer){
table[n-1].bucketElem[j] = container->overflowbucket[i];
table[n-1].bucketstatus[j] = besetzt;
container->overflowbucket[i] = 0;
container->overflowstatus[i] = leer;
reHashed = true;
}else if(j == N-1 && table[n-1].newoverflow == nullptr){
table[n-1].newoverflow = new OverflowContainer();
addToOverflow(container->overflowbucket[i],table[n-1].newoverflow);
container->overflowbucket[i] = 0;
container->overflowstatus[i] = leer;
reHashed = true;
}else if(j == N-1 && table[n-1].newoverflow != nullptr){
addToOverflow(container->overflowbucket[i], table[n-1].newoverflow);
container->overflowbucket[i] = 0;
container->overflowstatus[i] = leer;
reHashed = true;
}
}
reHashed = false;
++i;
}else{
++i;
}
if(container->nextOverflow){
container = container->nextOverflow;
i = 0;
}
}
}