Problem
I'm trying to implement some sort of "fuzzy search" in my Node.js based project.
Fuzzy search is a search that returns results even if the string didn't match exactly.
I found this code in another stackoverflow thread. Code is below.
It's quite good, but the problem is - it's synchronous It slows down the whole program when it searches through a large array.
Question
ES6 methods are welcomed. Needs to work only in the latest Chrome, so any JS methods will work.
Are there any new JS methods that would optimize this function?
Am I doing something wrong there that makes it even slower?
Can I turn this function into an async function that returns a promise? Will it stop freezing the app during the search then?
Are there any better "fuzzy search" implementations you know of? (I found a module called fuzzysort, can't say if it's that much better though, it won't return
"folder test"
if you type"test folder"
(wrong order) so it's not that good)
Code
Calling search function
searchArray is an array of paths it searches through, e.g.: ["C:\\test", "C:\\file.txt"...]
(0.5 - 5 million paths)
searchQuery is a string without spaces, e.g.: filetxt
search () {
const fuzzySearch = this.fuzzySearch(this.searchQuery.toLowerCase(), this.searchArray)
let result = fuzzySearch.filter(element => element.relevance >= 0.3)
// sort by relevance
var sortedResults = result.sort((a, b) => parseFloat(b.relevance) - parseFloat(a.relevance)).map(item => item.name);
this.searchResults = sortedResults
},
The fuzzy search function
fuzzySearch (searchQuery, searchArray) {
const get_bigrams = function(string) {
const s = string.toLowerCase();
const v = new Array(s.length - 1);
for (let i = 0, end = v.length; i <= end; i++) {
v[i] = s.slice(i, i + 2);
}
return v;
};
const string_similarity = function(str1, str2) {
if ((str1.length > 0) && (str2.length > 0)) {
const pairs1 = get_bigrams(str1);
const pairs2 = get_bigrams(str2);
const union = pairs1.length + pairs2.length;
let hit_count = 0;
for (let x of Array.from(pairs1)) {
for (let y of Array.from(pairs2)) {
if (x === y) {
hit_count++;
}
}
}
if (hit_count > 0) {
return ((2.0 * hit_count) / union);
}
}
return 0.0;
};
let results = [];
for (let name of searchArray) {
// I added .match to use only the base filename (name+ext) not the whole path, and removed all characters
let filteredPath = name.match(/[^\\\/]+$/)[0].replace(/[^A-Za-z0-9.]+/g, '')
const relevance = string_similarity(searchQuery, filteredPath);
const obj = {name, relevance};
results.push(obj);
}
return results
},