I am trying to write a function for use in Google Sheets that compares the string in one cell against the values in a range of other cells and returns the string with the lowest Levenshtein distance. For example:
| A | B
---------------------------------
1 | Nick Cheetah Zachary Lavalle
2 | Hello Kitty
3 | James Franco
4 | Nicholas Cheetah
The function I am trying to write in Google Sheets would have the form
=MyFunction(A1,B1:B4)
and would return "Nicholas Cheetah"
I have the code for computing the Levenshtein distance from another sub, but where I am running into issues is storing the distance for every value in a range and then returning the value with the lowest distance.
function Levenshtein(a, b) {
if(a.length == 0) return b.length;
if(b.length == 0) return a.length;
// swap to save some memory O(min(a,b)) instead of O(a)
if(a.length > b.length) {
var tmp = a;
a = b;
b = tmp;
}
var row = [];
// init the row
for(var i = 0; i <= a.length; i++){
row[i] = i;
}
// fill in the rest
for(var i = 1; i <= b.length; i++){
var prev = i;
for(var j = 1; j <= a.length; j++){
var val;
if(b.charAt(i-1) == a.charAt(j-1)){
val = row[j-1]; // match
} else {
val = Math.min(row[j-1] + 1, // substitution
prev + 1, // insertion
row[j] + 1); // deletion
}
row[j - 1] = prev;
prev = val;
}
row[a.length] = prev;
}
return row[a.length];
Code so far:
function comp_distance(a,b) {
a = "Nick Cheetah"
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Agent_Info"); //define spreadsheet name
var last_row = sheet.getLastRow();
var range = sheet.getRange(1,1,last_row)
var values = range.getValues();
var new_array = new Array();
var lev_array = new Array();
for (var i=0; i < values.length; i++) {
new_array[i] = range[i]
}
for (var i=0; i < new_array.length; i++) {
lev_array[i] = Levenshtein(a,new_array[i])
}
Logger.log(values.length)
Logger.log(new_array)
}
The code is erroring out when trying to run the Levenshtein function on the values. However, I think my approach to the problem is wrong, as I will subsequently need to find the lowest distance between the values and return that in the function.
Any advice or help is appreciated!