My solution uses replace() with word-boundaries in pattern and global modifier g.
The advantage of replace is, that a callback function can be passed as replacement. I hope you like it, found it very intresting, as didn't do much with JS yet. So correct any errors, if you find please :)
// test it
var WordsToMatch = new Array('fox', 'dog');
var MaxHighlights = 2; // no limit = 0
var TestStr =
'The quick brown fox jumps over the lazy dog but the lazy dog is '+
'quick of the mark to catch the brown fox. In general the ' +
'fox versus the dog is not a good match.';
document.write(highlight(TestStr, WordsToMatch, MaxHighlights));
// --- JOHNNY 5's WORD HIGHLIGHTER ---
// highlight words in str using a callback function
function highlight (str, words, limit)
{
for(var i = 0; i < words.length; i++)
{
// match each word case insensitive using word-boundaries
var pattern = new RegExp("\\b" + words[i] + "\\b","gi");
var j = 0;
str = str.replace(pattern, function (w) {
j++; return ((limit <= 0) || (j <= limit)) ? "<b>" + w + "</b>" : w;
});
}
return str;
}
The callback function will return highlighted matches as replacement until the limit is reached.
Output:
The quick brown fox jumps over the lazy dog but the lazy dog is quick of the mark to catch the brown fox. In general the fox versus the dog is not a good match.
EDIT: And now I see, there are extra points available...
For extra points I'd preferably only highlight one match per sentence.
That was a bit more challenging, and I hope it works, as it should in most cases. It's not so trivial to determine: What is a sentence? Well I decided, to make it simple and consider the split-sequence as a definable punctuation mark (var sep_punct
), followed by one or more white-spaces, if there's an upper letter or digit ahead.
var WordsToMatch = new Array('fox', 'dog');
var TestStr =
'The quick brown fox jumps over the lazy dog but the lazy dog is '+
'quick of the mark to catch the brown fox. In general the ' +
'fox versus the dog is not a good match.';
// --- JOHNNY 5's FIRST WORD IN SENTENCE HIGHLIGHTER ---
// highlight first occurence of word in each sentence
function higlight_first_w_in_sentence(str, words)
{
// split the string at what we consider a sentence:
// new sentences usually start with upper letters, maybe digits
// split-sequence: sep_punct, followed by one or more whitespaces,
// looking ahead for an upper letter or digit
var sep_punct = '[.;?!]';
// set the split-pattern, starting with sep_punct
var pattern = new RegExp(sep_punct + "\\s+(?=[A-Z0-9])", "g");
// remember split-sequence
var sep = str.match(pattern);
// split str into sentences
var snt = str.split(pattern);
// check sentences split
if((typeof snt != 'undefined') && (Object.prototype.toString.call(snt) === '[object Array]'))
{
// now we loop through the sentences...
for(var i = 0; i < snt.length; i++)
{
// and match each word case insensitive using word-boundaries (zero-with)
for(var j = 0; j < words.length; j++)
{
var pattern = new RegExp("\\b" + words[j] + "\\b", "i");
// and replace it with highlighted reference 0,
// which is $& in JS regex (part, that matches the whole pattern)
snt[i] = snt[i].replace(pattern, "<b>$&</b>");
}
}
// if seperators, rejoin string
if((typeof sep != 'undefined') && (Object.prototype.toString.call(sep) === '[object Array]') && (sep.length > 0) &&
(typeof snt != 'undefined') && (Object.prototype.toString.call(snt) === '[object Array]') && (snt.length > sep.length)
)
{
var ret = "";
for(var j = 0; j < snt.length; j++)
{
if(j>0) {
ret += (typeof sep[j-1] != 'undefined') ? sep[j-1] : " ";
}
ret += snt[j];
}
return ret;
}
// if no seperators
return snt.join(" ");
}
// if failed
return str;
}
document.write(higlight_first_w_in_sentence(TestStr, WordsToMatch));
Output:
The quick brown fox jumps over the lazy dog but the lazy dog is quick of the mark to catch the brown fox. In general the fox versus the dog is not a good match.