2

I'm trying to remove duplicates from my array and display the number of times that a particular word shows up in the array. I've seen ways to handle this, but I tried the ones I've found, and they're not working. When I enter text such as "this this is a test test," it will return a final sorted list of:

1 - is

1 - a

2 - this

2 - test

Though I will eventually be reversing the order of the array, so that the highest numbers are first in the list, this result is perfect! But, if I change the text up a bit, to something like "this is a test test this," the order goes completely out of whack, as shown here:

1 - this

1 - is

1 - a

1 - this

2 - test

As you can see, 'test' shows 2x, which is great, but 'this' shows up twice in the list with only a number '1'. It only compiles duplicates that are in succession. How do I prevent this?

Here is my code:

<!DOCTYPE html>
<html>
<body>

<p>Click the button to display the array values after the split.</p>

<button onclick="analyze()">Analyze</button>

<p id="displayText"></p>

<script>
function compareWordCount(a,b) {
  if (parseInt(a) < parseInt(b))
    return -1;
  return 1;
}

function analyze() {
    var str = "this is a test test this";
    var res = str.split(" ");
    document.getElementById("displayText").innerHTML = res;
    document.getElementById("displayText").innerHTML += "<br/><br/>The amount of words is: " + res.length + "<br/><br/><br/>";

    document.getElementById("displayText").innerHTML += "The list of words:<br/><br/>";

    var words = [];

    var wordsWithCount = [];

    for (i = 0; i < res.length; i++) {
        words.push(res[i]);
        document.getElementById("displayText").innerHTML += words[i] + "<br/><br/>";
    }

    var current = null;
    var cnt = 0;
    for (var i = 0; i < words.length; i++) {
        if (words[i] != current) {
            if (cnt > 0) {
                document.getElementById("displayText").innerHTML += "<br/><br/>" + cnt + " - " + current + "<br/>";
                wordsWithCount.push(cnt + " - " + current);
            }
            current = words[i];
            cnt = 1;
        } else {
            cnt++;
        }
    }

    if (cnt > 0) {
        document.getElementById("displayText").innerHTML += "<br/><br/>" + cnt + " - " + current + "<br/>";
        wordsWithCount.push(cnt + " - " + current);
    }

    wordsWithCount.sort(compareWordCount);

    document.getElementById("displayText").innerHTML += "<br/><br/><br/><br/><br/>The list of SORTED words:<br/><br/>";

    for (i = 0; i < wordsWithCount.length; i++) {
        document.getElementById("displayText").innerHTML += wordsWithCount[i] + "<br/><br/>";
    }
}
</script>

</body>
</html>
Ryan
  • 511
  • 1
  • 8
  • 18
  • Possible duplicate of [Count unique elements in array without sorting](http://stackoverflow.com/questions/15052702/count-unique-elements-in-array-without-sorting) – GSerg May 10 '17 at 22:06
  • And in this too: http://stackoverflow.com/questions/11649255/how-to-count-the-number-of-occurrences-of-each-item-in-an-array – funcoding May 10 '17 at 22:33

4 Answers4

0

Here is a possible solution using for and Array.prototype.forEach():

var str = "One Two Three One One Three Two One Two Three",
    arr = str.split(' '),
    res = {},
    nb = 0;
    
for (var i = 0; i < arr.length; i++) {
  nb = 0;
  arr.forEach(function (item) {
    if (item === arr[i]) {
      nb++;
    }
  });
  if (!res[arr[i]]) {
    res[arr[i]] = nb;
  }
}

console.log(res);
Badacadabra
  • 8,043
  • 7
  • 28
  • 49
0

Try using an object to store the counts for each word:

    var str = "this is a test test this";
    var words = str.split(" ");
    var wordsWithCount = {};

    for (var i = 0; i < words.length; i++) {
        var word = words[i];
        if (word in wordsWithCount) {
            wordsWithCount[word]++;
        } else {
            wordsWithCount[word] = 1;
        }
    }

    console.log("WORD COUNTS");

    for (word in wordsWithCount) {
        console.log(word + " - " + wordsWithCount[word]);
    }
    
    var sortable = [];
    for (var word in wordsWithCount) {
        sortable.push([word, wordsWithCount[word]]);
    }
    
    sortable.sort(function(a, b) {
        return b[1] - a[1];
    });
    
    console.log("SORTED WORD COUNTS");
    
    for (var i = 0; i < sortable.length; i++) {
        var word = sortable[i][0];
        console.log(word + " - " + wordsWithCount[word]);
    }
jmhummel
  • 109
  • 1
  • 7
  • Can I sort by `parseInt` like I've done for the array? – Ryan May 10 '17 at 22:34
  • Or can I send the data to an array from the object? – Ryan May 10 '17 at 22:35
  • @Ryan I added code to sort by count from highest to lowest – jmhummel May 10 '17 at 22:58
  • Awesome, thank you! Here's the other problem, however... The first word that it evaluates comes out as a '1,' and is shown again in the list (if used again) with the remaining quantity. For example, 'one one two one two three two' displays the following result: `one - 1` `one - 2` `two - 3` `three - 1` How do I get the ones to combine? – Ryan May 11 '17 at 00:00
  • Not sure what you mean, Ryan. I just tested my code on the input `"one one two one two three two"` and got the result: `one - 3` `two - 3` `three - 1` – jmhummel May 11 '17 at 04:01
0

I do believe GSerg is correct about this being a duplicate, but to help you understand the logic of what's happening in your code, here's what your code is actually doing, in words:

First you're taking your string this is a test test this and splitting it into an array: words = [this,is,a,test,test,this]

On each iteration of the loop, you're comparing the current word to the previous word. If the current word is the same word as the previous word, you increase your counter. If the current word is not the same as your previous word, you're adding the previous word to the end of the wordsWithCount array.

This goes well until you get to the second this in your array. This is because when you compare this to test and find that they're not the same word. You then start a whole new counter for the second this. With this new counter, you also add to the end of the wordsWithCount array instead of adding to the existing counter for this.

To prevent this, you could use a key-value structure instead of a normal array:

for (var i = 0; i < words.length; i++) {
    if(wordsWithCount[words[i]] == undefined) {
        wordsWithCount[words[i]] = 1;
    } else {
        wordsWithCount[words[i]]++;
    }
}

Which will give you this structure:

wordsWithCount: {
    'this': 2,
    'is': 1,
    'a': 1,
    'test': 2
}

And you can loop through the structure as well to create the formatted list you had mentioned:

var formattedWordsWithCount = [];
for (var word in wordsWithCount) {
    formattedWordsWithCount.push(wordsWithCount[word] + " - " + word);
}
D. R.
  • 324
  • 1
  • 10
0

Here is a solution that will ignore case and punctuation.

function sanitizeWord(word) {
  let result;
  const re = /(\w+)/g.exec(word.toLowerCase().trim());
  if(re) {
    result = re[1];
  }
  return result;
}

function analyze(str) {
  let result = {};
  str.split(" ").forEach((word) => {
    word = sanitizeWord(word);
    if(word) {
      if (result.hasOwnProperty(word)) {
        result[word] = result[word] + 1;
      } else {
        result[word] = 1;
      }
    }
  });
  return result;
}

console.log(analyze("This, . is a test! test this this"));
Thomas Powell
  • 115
  • 1
  • 12
  • I will be needing to ignore case and punctuation, so that's great! But how do I call those methods? How would I implement this into my code? – Ryan May 10 '17 at 23:50
  • I actually used a different method. Just replaced any possible punctuation character, and called `toLowerCase()`. Thank you! – Ryan May 11 '17 at 00:19