I have wrote this function that aims to replace words or phrases in a text
document with a specified expression expr
given a set of tokens
to be matched.
The document is newline formatted.
function replaceTokens(text, tokens, expr, isline = false) {
tokens.forEach(word => {
if (expr[token]) {
if (isline) { // line regex
text = text.replace(new RegExp("(" + word.replace(/([\(\)'?*!"])/g, "\\$1") + ")", "gi"), expr);
} else {
text = text.replace(new RegExp("(" + word + ")", "gi"), expr[token]);
}
}
});
return text;
}
I'm facing two problems.
1) For words tokens like Lorem
, qui
, etc. it works pretty ok, but I cannot get rid of the whole token case i.e. I do not want to match qui
within a word like quis
, but only the given token in the text. Using ^word$
does not work here with capture group ^(word)$
[1 - SOLVED] according to first answer with new RegExp("\\b(" + word + ")\\b", "gi")
2) For phrases tokens, the regex I'm using does not work properly.
I want to match the exact line like Lorem ipsum dolor sit amet
in
Lorem ipsum dolor sit amet
Lorem ipsum dolor sit amet etwas
it should match the first line only, not the second line as well.
Here is an example. For (1) you can see how qui
is captured as a token and within the word quis
or aliquip
.
function replaceTokens(text, tokens, expr, isline = false) {
tokens.forEach(word => {
if (isline) { // line regex
text = text.replace(new RegExp("(" + word.replace(/([\(\)'?*!"])/g, "\\$1") + ")", "gi"), expr);
} else {
text = text.replace(new RegExp("\\b(" + word + ")\\b", "gi"), expr);
}
});
return text;
}
text = "Lorem ipsum dolor sit amet,\n consectetur adipiscing elit,\nsed do eiusmod tempor incididunt\nut labore et dolore magna aliqua.\nUt enim ad minim veniam,\nquis nostrud exercitation ullamco laboris nisi\nut aliquip ex ea commodo consequat.\nDuis aute irure dolor in reprehenderit in voluptate velit esse\ncillum dolore eu fugiat nulla pariatur.\nExcepteur sint occaecat cupidatat non proident,\nLorem ipsum dolor sit amet etwas,\nsunt in culpa qui officia deserunt mollit anim id est laborum"
out = replaceTokens(text, ["Lorem", "ut", "qui"], "<strong>$1</strong>", false)
out_phrases = replaceTokens(text, ["Lorem ipsum dolor sit amet", "Duis aute irure dolor in reprehenderit"], "<strong>$1</strong>", true)
document.getElementById("in_text").innerHTML = text.replace(/\n/g, '<br/>')
document.getElementById("out_text").innerHTML = out.replace(/\n/g, '<br/>')
document.getElementById("out_phrases").innerHTML = out_phrases.replace(/\n/g, '<br/>')
<div id="in_text"></div>
<hr>
<div id="out_text"></div>
<hr>
<div id="out_phrases"></div>
Addded jsfiddle snippet to try it out.