I have to loop over a list of words and match them against a regex and I'm getting too many matches (looking for ALPHA also matches ALPHABET for example). Here is my script:
function evaluateChunk(my_chunk, my_input) {
var rows = my_chunk.split(LINE_BREAKS).filter(Boolean),
word_string = my_input.replace(/ /g, "|"),
re = new RegExp("(" + word_string + ")(?:\\([0-9]\\))?"),
output_dict = {"error_list": [], "match_dict": {}},
row_len = rows.length,
candidate,
j;
for (j = 0; j < row_len; j += 1) {
candidate = rows[j].split(" ")[0];
if (candidate.match(re) !== null) {
output_dict.match_dict[candidate] = rows[j].split("]").pop().trim();
}
}
output_dict.error_list = word_string.split("|").reduce(function (arr, word) {
if (output_dict.match_dict[word] === undefined) {
arr.push(word);
}
return arr;
}, []);
return output_dict;
}
my_input will be something like ALPHA BETA
, which will be converted to ALPHA|BETA
and put into the regular expression (which needs to catch both ALPHA
AND ALPHA(2)
hence the regex). Rows is a chunk from a dictionary I'm trying to look inputs up from.
My issue is that the regular expressions should:
ALPHA => match (ok)
ALPHA(2) => match (ok)
ALPHABET => no match (doesn't work, is also returned)
ALPHANUMERIC => no match (doesn't work, is also returned)
Question:
How do I make a regular expression non greedy to only return exact word matches and no words with the same ... root?