Calculate the frequency of common characters

Question

How can I go about writing a function that compares two strings and returns an object containing the characters that occur in both strings, and how many times they occur?

Here is some example input and output:

"Leopard", "Lion"    // { "L": 2, "o": 2 }
"Hello",   "World"   // { "l": 3, "o": 2 }
"Flower",  "Power"   // { "o": 2, "w": 2, "e": 2, "r": 2 }
"phone",   "Memphis" // { "p": 2, "h": 2, "e": 2 }

score 5 · Answer 1 · edited Jun 20 '20 at 09:12

If you're working with small strings, there isn't any need to complicate things. The following method is simple, at the expense of performance when dealing with very large strings.

First we need to define an object to hold the counts for our duplicate characters.

Then we need to iterate each string. There are many way of going about iterating a string, but in the following examples I either:

invoke Array#forEach on the string using the Function#call method (ES5 example), or
with a for...of loop (ES6 example).

Both of these methods work because a string is an example of a built-in iterable object.

While iterating the strings, we need to first check if the character exists in the duplicate object.

If it does, we don't need to check it again, just increment the number.
If it does not, scan the other string to see if the character occurs there.
- If it does, increment the duplicate counter for that character.
- If it does not, do nothing.

Using this method, you don't count characters that aren't duplicates, and you don't try to verify characters that you have already verified. This should offer some significant speed improvements with longer strings.

Once you have finished iterating both strings, simply return the duplicate holder object.

ES6 Example

const countLetters = (v1, v2) => {
  const out = {};
  
  for(let char of v1) (out[char] || v2.indexOf(char) !== -1) && (out[char] = (out[char] || 0) + 1);
  for(let char of v2) (out[char] || v1.indexOf(char) !== -1) && (out[char] = (out[char] || 0) + 1);

  return out;
}

const test = (v1,v2) => (console.log(JSON.stringify(countLetters(v1, v2))),test);

test("Leopard", "Lion")            // { "L": 2, "o": 2 }
    ("Hello", "World")             // { "l": 3, "o": 2 }
    ("Flower", "Power")            // { "o": 2, "w": 2, "e": 2, "r": 2 }
    ("phone", "Memphis")           // { "p": 2, "h": 2, "e": 2 }
    ("Bookkeeper", "Zookeeper")    // { "o": 4, "k": 3, "e": 6, "p": 2, "r": 2 }
    ("Some thing", "Other thing"); // { "e": 2, " ": 2, "t": 3, "h": 3, "i": 2, "n": 2, "g": 2 }

ES5 Example

var countLetters = function(v1, v2) {
  var out = {};
  
  Array.prototype.forEach.call(v1, function(char) {
    if(out[char] || v2.indexOf(char) !== -1) out[char] = (out[char] || 0) + 1;
  });
  Array.prototype.forEach.call(v2, function(char) {
    if(out[char] || v1.indexOf(char) !== -1) out[char] = (out[char] || 0) + 1;
  });

  return out;
}

var test = function(v1,v2){ return (console.log(JSON.stringify(countLetters(v1, v2))),test) };

test("Leopard", "Lion")            // { "L": 2, "o": 2 }
    ("Hello", "World")             // { "l": 3, "o": 2 }
    ("Flower", "Power")            // { "o": 2, "w": 2, "e": 2, "r": 2 }
    ("phone", "Memphis")           // { "p": 2, "h": 2, "e": 2 }
    ("Bookkeeper", "Zookeeper")    // { "o": 4, "k": 3, "e": 6, "p": 2, "r": 2 }
    ("Some thing", "Other thing"); // { "e": 2, " ": 2, "t": 3, "h": 3, "i": 2, "n": 2, "g": 2 }

Performance becomes a concern when you start working with very large strings. The following method employs some algorithmic wizardry to gain performance.

First, find the frequency of each letter in both strings. Working with a sorted array is faster than working with an unsorted array, and saves some time counting because we can split the string into groups of common characters instead of counting each character.

I use a Map object to take advantage of the Map#has method—which is much faster than Array#indexOf—in the next function.

// Calculate the frequency of each character in a string
const calculateCharacterFrequency = string => {
   // Split the string into individual characters
    const array = [...string];
    // Sort the split string
    const sorted = array.sort();
    // Join the split string
    const joined = sorted.join("");
    // Split the string into groups of common characters
    const matches = joined.match(/(.)\1*/g);
    // Iterate the groups
    const frequency = matches.reduce(
        (frequency, character) => { 
            // Insert char and frequency into map
            frequency.set(character[0], character.length);
            // return frequency map for use in next iteration
            return frequency;
        },
        // This is the map that will be passed as `frequency`
        new Map()
    );
    // Return the map
    return frequency;
};

Next, find the common characters between each string and create a map containing the frequency of each common character.

The biggest performance gain here is to create a unique set of all of the characters that occur in either string. I utilize the unique nature of a Set object to remove duplicates from the array, there are other ways of going about removing duplicates from an array but that was the fastest one that I tested. This way, we are only iterating one unqiue set and checking that the character occurs in both maps.

// Calculate the frequency of each character two strings have in common
const calculateCommonCharacterFrequency = (v1, v2) => {
    // Get frequency map of first string
    v1 = calculateCharacterFrequency(v1);
    // Get frequency map of second string
    v2 = calculateCharacterFrequency(v2);
    // Create an array containing a list of all characters occuring in either string
    const combinedCharacterArray = [...v1.keys(), ...v2.keys()];
    // Remove duplicate characters
    const combinedUniqueCharacterSet = new Set(combinedCharacters);
    // Convert set back to array
    const combinedUniqueCharacterArray = Array.from(combinedUniqueSet);
    // Iterate the unique array of common characters
    const commonFrequency = combinedUniqueCharacterArray.reduce(
        (commonFrequency, character) => {
            // Check if both strings have the character
            const isCommon = v1.has(character) && v2.has(character);
            if(isCommon) {
                // Find the sum of the frequency of the character in both strings
                const combinedFrequency = v1.get(character) + v2.get(character);
                // Insert character and frequency into map
                commonFrequency.set(character, combinedFrequency);
            }
            // return frequency map for use in next iteration
            return commonFrequency;
        }, 
        // This is the map that will be passed as `commonFrequency`
        new Map()
    );
    // Return the map
    return commonFrequency;
};

Condensed Example

The above example has been expanded for readability and explanation purposes. The following example has been condensed to save space.

const calcCharFreq = string => [...string].sort().join("").match(/(.)\1*/g).reduce((freq, char) => (freq.set(char[0], char.length), freq), new Map());

const calcCommonCharFreq = (v1, v2) => {
    v1 = calcCharFreq(v1);
    v2 = calcCharFreq(v2);
    return Array.from(new Set([...v1.keys(), ...v2.keys()])).reduce((dup, char) => ((v1.has(char) && v2.has(char)) && dup.set(char, v1.get(char) + v2.get(char)), dup), new Map());
};

const test = (v1,v2) => (console.log('{ '+Array.from(calcCommonCharFreq(v1, v2)).reduce((pairs, value) => (pairs.push('"'+value[0]+'": '+value[1]), pairs), []).join(", ")+' }'), test);

test("Leopard", "Lion")            // { "L": 2, "o": 2 }
    ("Hello", "World")             // { "l": 3, "o": 2 }
    ("Flower", "Power")            // { "o": 2, "w": 2, "e": 2, "r": 2 }
    ("phone", "Memphis")           // { "p": 2, "h": 2, "e": 2 }
    ("Bookkeeper", "Zookeeper")    // { "o": 4, "k": 3, "e": 6, "p": 2, "r": 2 }
    ("Some thing", "Other thing"); // { "e": 2, " ": 2, "t": 3, "h": 3, "i": 2, "n": 2, "g": 2 }

Redu · Answer 2 · 2017-01-25T14:24:54.280

This is a very nice question. If I got it right you may easily do in O(2n) as follows;

function relationIndex(s){
  var a = s.split(/\s+/),
   hash = a[0].length < a[1].length ? Array.prototype.reduce.call(a[1], (p,c) => (p[c] && (p[c] = Math.abs(p[c])+1),p), Array.prototype.reduce.call(a[0], (p,c) => (p[c] = --p[c] || -1,p),{}))
                                    : Array.prototype.reduce.call(a[0], (p,c) => (p[c] && (p[c] = Math.abs(p[c])+1),p), Array.prototype.reduce.call(a[1], (p,c) => (p[c] = --p[c] || -1,p),{}));
  for (var k in hash) hash[k] <= 0 && delete hash[k];
  return hash;
}

var str = "Lion Leopard";
console.log(relationIndex(str));
str = "Brontosaurus Butterfly";
console.log(relationIndex(str));
str = "London Paris";
console.log(relationIndex(str));
str = "phone Memphis";
console.log(relationIndex(str));

Russell Jonakin · Answer 3 · 2017-01-25T09:02:58.157

Here is an option that should be FAST (basically because it is not doing anything fancy at all). This will account for character case sensitivity, all lower case a-z chars, all upper case A-Z chars, numeric 0-9 chars, and empty space chars.

here I just use parallel arrays for the ASCII char values of the chars for the first and second words.

var firstString = "hello world";
var secondString = "FOLLOW me";
var firstChars = [];
var secondChars = [];
var arrLength = 122; 
// 0 - 122 covers ASCII values for all chars a-z (lowers), A-Z (uppers), numbers, and empty spaces.

//initialize array to arrLength number of elements, all set to 0.
for (var i = 0; i < arrLength; i++)
{
    firstChars.push(0);
    secondChars.push(0);
}

//count first word chars
for (var i = 0; i < firstString.length; i++)
    firstChars[firstString.charCodeAt(i)]++;

//count second word chars
for (var i = 0; i < secondString.length; i++)
    secondChars[secondString.charCodeAt(i)]++;

//output matching chars and the count of occurences.
for (var i = 0; i < arrLength; i++)
{
    if (firstChars[i] > 0 && secondChars[i] > 0)
    {
        var letter = i == 32? "Empty Space" : String.fromCharCode(i);
        var count = firstChars[i] + secondChars[i];

        console.log(letter + " : " + count);
    }
}

Here is a jsFiddle of this code to play around with and test.

That is doing a lot of extra work for no reason. Why do you need to create two new arrays with a total of 244 placeholder elements? Why count the characters before filtering out the non-duplicates? — , Jan 25 '17 at 09:03
@TinyGiant Yea you are right, I do agree this is doing extra work, but It is pretty much a brute force method and they usually are fast. but I hear ya, this thing is a bit wasteful... "Why count the characters before filtering out the non-duplicates?"... Good point, I'm going to go with because I wasn't smart enough to do that 30 minutes ago... thanks for the tip : ) — Russell Jonakin, Jan 25 '17 at 09:29

score 0 · Answer 4 · edited Jan 25 '17 at 19:07

Though this example can probably be more elegant, I think the approach is simpler and more readable.

Here's a Pen to play around with.

function getRepeated(x, y) {
  let result = [];
  x.split('').forEach(value => {
    let array = testCharacterToString(value, y);
    if (array.length > 0) {
      result.push(array[0]);
    }
  });
  y.split('').forEach(value => {
    let array = testCharacterToString(value, x);
    if (array.length > 0) {
      result.push(array[0]);
    }
  });

  result = result.reduce(function(prev, cur) {
    prev[cur] = (prev[cur] || 0) + 1;
    return prev;
  }, {});

  return JSON.stringify(result) // {"o":4,"k":3,"e":6,"p":2,"r":2}
}

function testCharacterToString(char, string) {
  return string.split('').filter(value => {
    return value === char;
  });
}

var test = function(arg1,arg2){ return (console.log(getRepeated(arg1, arg2)),test) };

test("Leopard", "Lion")            // { "L": 2, "o": 2 }
    ("Hello", "World")             // { "l": 3, "o": 2 }
    ("Flower", "Power")            // { "o": 2, "w": 2, "e": 2, "r": 2 }
    ("phone", "Memphis")           // { "p": 2, "h": 2, "e": 2 }
    ("Bookkeeper", "Zookeeper")    // { "o": 4, "k": 3, "e": 6, "p": 2, "r": 2 }
    ("Some thing", "Other thing"); // { "e": 2, " ": 2, "t": 3, "h": 3, "i": 2, "n": 2, "g": 2 }

ChaudPain · Answer 5 · 2017-01-26T16:19:43.940

0

Here is a more readable version of Redu's algo...

function count(v1, v2) {
  let map = {};
  Array.prototype.forEach.call(v1, char => map[char] = map[char]++ || 1);
  Array.prototype.forEach.call(v2, char => {
    if(map[char]) map[char] += 1; 
  });
  for(let char in map) {
    if(map[char] < 2) {
      delete map[char];
    }
  }
  console.log(map)
}
count('Lion', 'Leopard');

edited Jan 26 '17 at 16:19

answered Jan 25 '17 at 20:02

ChaudPain

216
1
11

score 0 · Answer 6 · 2017-01-26T06:10:57.983

I would start off with a simple function, called countChars, to count the occurrences of characters in a single string. You can find this elsewhere on SO, but I provide a simple version. The output is an object, keyed by character, with counts as values.

We can then solve the problem of finding matching characters in two strings as a sort of merging of two objects, which we implement in a function called combineObjectsByKey.

function countChars(str) {
  return str.split('').reduce((result, chr) => {
    if (!(chr in result)) result[chr] = 0;
    result[chr]++;
    return result;
  }, {});
}

function combineObjectsByKey(o1, o2, fn) {
  const result = {};

  Object.keys(o1).forEach(key => {
    if (key in o2) result[key] = fn(o1[key], o2[key]);
  });

  return result;
}

function add(a, b) { return a + b; }

function matchingCounts(str1, str2) {
  return combineObjectsByKey(countChars(str1), countChars(str2), add);
}

console.log(matchingCounts("Leopard", "Lion"));

Alternative approach

You could also simply concatenate the strings, count characters in the concatenated string, and then remove characters not occurring in both strings:

function matchingCounts(str1, str2) {
  const counts = countChars(str1 + str2);

  Object.keys(counts).forEach(chr => {
    if (!str1.includes(chr) || !str2.includes(chr)) delete counts[chr];
  });

  return counts;
}

Calculate the frequency of common characters

6 Answers6

ES6 Example

ES5 Example

Condensed Example

Alternative approach