I wrote a function that is given 2 strings, s and a and it is supposed to return position of the first occurrence of a in s. It works fine with a as one character, but otherwise it stops working if the first occurrence is after 3rd character in s.
I've already checked if mul and add work, if hash of a is correct and I've reduced bases to 10 and 100 (which are not very good for hashing cause they're not prime) and it worked (on strings of length of 20). This might mean that modulo doesn't work as expected.
function getIndex(s, a) {
// 2 bases and 2 mods to reduce the number of collisions
var base1 = 31337;
var base2 = 31357;
var mod1 = 1e9 + 7;
var mod2 = 1e9 + 9;
//suffix arrays
var hs1 = new Uint32Array(s.length);
var hs2 = new Uint32Array(s.length);
// bases to the power of a.length
var ba1 = 1;
var ba2 = 1;
// hashes for a
var ha1 = 0;
var ha2 = 0;
//operators
var mul = (x, y, mod) => (x * y) % mod;
var add = (x, y, mod) => {
x += y;
if(x >= mod) x -= mod;
if(x < 0) x += mod;
return x;
}
//get hash of a and find value of ba1 and ba2
for(var i = 0; i < a.length; i++) {
ha1 = add(mul(ha1, base1, mod1), a.charCodeAt(i), mod1);
ha2 = add(mul(ha2, base2, mod2), a.charCodeAt(i), mod2);
ba1 = mul(ba1, base1, mod1);
ba2 = mul(ba2, base2, mod2);
}
//make suffix array
var h1 = 0;
var h2 = 0;
for(var i = 0; i < s.length; i++) {
h1 = add(mul(h1, base1, mod1), s.charCodeAt(i), mod1);
h2 = add(mul(h2, base2, mod2), s.charCodeAt(i), mod2);
hs1[i] = h1;
hs2[i] = h2;
}
//Compare hashes of substrings of s (by removing prefix from the current element) with hash of a
for(var i = a.length - 1; i < s.length; i++) {
var h1 = hs1[i];
var h2 = hs2[i];
if(i >= a.length) {
console.log(i, i - a.length, h1);
h1 = add(h1, -mul(hs1[i - a.length], ba1, mod1), mod1);
h2 = add(h2, -mul(hs2[i - a.length], ba2, mod2), mod2);
}
if(h1 == ha1 && h2 == ha2) return i - a.length + 1;
}
return -1;
}
getIndex("abcdefgh", "f") //returns 5
getIndex("abcdefgh", "fg")//returns -1