You would probably want to avoid any iterations on duplicate elements and keep your results array unique. Since any of the iterators of Array.prototype will include each of the elements, they might not be the ideal solution for this. Sometimes plain old loops do the job best ...
(You may also want to expressively escape any special characters in your regular expression).
function wordFrequency(txt) {
var words = txt.split(/[ \.\?!,\*'"]+/),
seen = [];
for (var i = 0; i < words.length; i++) {
var w = words[i],
found = false;
for (var j = 0; j < seen.length; j++) {
if (w === seen[j].text) {
seen[j].size++;
found = true;
break;
}
}
if (!found) seen.push( { text: w, size: 1 } );
}
return seen;
}
(Note that the inner for-loop isn't visited for the first word, so the first word will be pushed to the seen-stack and the inner for-loop will start with the second word compared to the first one. Only words that we haven't seen already are added to the seen-stack, making it an array of unique elements.)
And here is the equivalent using Array.prototype.forEach() and Array.prototype.indexOf(), but we have to add another intermediate results stack for the latter one. So we'll have to add another iteration to produce the final result. (We wouldn't have to do this using Array.prototype.findIndex(), but this is not a standard method.)
function wordFrequency2(txt) {
var words = txt.split(/[ \.\?!,\*'"]+/),
seen = [],
freq = [];
// get frequencies
words.forEach(function (w) {
var idx = seen.indexOf(w);
if (idx >= 0) {
freq[idx]++;
}
else {
seen.push(w);
freq.push(1);
}
});
// produce the results array
var r = [];
seen.forEach(function (w, idx) {
r.push( { text: w, size: freq[idx] } );
});
return r;
}
Putting optimization into account, the first version using explicit loops will be probably performing faster ...