Clarifications
This section contains extracts from a discussion between me and the OP (Original Poster).
First we redefined the problem like so:
ME: So you want to compare the order of common elements?
OP: If that's what it is called, yes. Keeping in mind that names can repeat in either or both arrays. We're trying to detect if there is an inversion happening, even though there may be extraneous elements.
Then we reduced the problem to comparing the order of common letters between two words:
OP: [CUT] "AB" == "ACB", "AB" == "ABC", "AB" == "CAB", "AB" !== "BCA", "AB" !== "CBA", "AB" !== "BAC"
Finally the OP posted the following suggestion:
OP: I think the simplest way is to detect those elements that don't appear in the other array and remove them. Do that in the inverse direction, then do the comparison (removing C). I just don't know how to do that.
I haven't tried yet, but I think it's quite close to the first algorithm below since it will simply ignore elements that are not in the other array.
Algorithm #1
The idea is to read the arrays from left to right removing pairs of common elements, then to check if there is still common elements in the remaining set.
Warning. Fails with "ABACD" and "BADCD".
Note 1. This counter-example is interesting since it reveals that whatever the order of the parameters given to the match
function, we miss a valid pair of arrays.
Note 2. Iterating in the opposite way works in this case, but using palindromes we can show that it does not always help, for example, "ABACDDCABA" and "BADCDDCDAB".
Note 3. If we drop the first letter of "ABACD", the algorithm gives the expected result, which suggests that a recursive approach might be appropriate.
A picture is worth a thousand words:
a = "ABA", b = "BA" => a[i] != b[j] => x = 0b000, y = 0b00
^ ^ ^ ^
i j i j
a = "ABA", b = "BA" => a[i] == b[j] => x = 0b001, y = 0b10
^ ^ ^ ^
i j i j
> | a.split("").filter(function (_, i) {
| return bit(i, x) === 0;
| })
< | ["B", "A"]
> | b.split("").filter(function (_, i) {
| return bit(i, y) === 0;
| })
< | ["B"]
If the final arrays contain common elements we would say that there is an inversion. In this case we have to swap the original arrays to compare them once more:
a = "BA", b = "ABA" => a[i] != b[j] => x = 0b00, y = 0b000
^ ^ ^ ^
i j i j
a = "BA", b = "ABA" => a[i] == b[j] => x = 0b01, y = 0b010
^ ^ ^ ^
i j i j
a = "BA", b = "ABA" => a[i] == b[j] => x = 0b11, y = 0b110
^ ^ ^ ^
i j i j
> | a.split("").filter(function (_, i) {
| return bit(i, x) === 0;
| })
< | []
> | b.split("").filter(function (_, i) {
| return bit(i, y) === 0;
| })
< | ["A"]
Considering the last result, we would say that "ABA" and "BA" match your criteria.
fails = 0;
N = !(Y = true)
// tests[3 * i] = expected result
// tests[3 * i + 1] = array A
// tests[3 * i + 2] = array B
tests = [
N, "BAA".split(""), "ABA".split(""),
N, "ABA".split(""), "BAA".split(""),
Y, "ABA".split(""), "BA+".split(""),
Y, "BA+".split(""), "ABA".split(""),
Y, "ABACD".split(""), "BADCD".split(""),
Y, ["Bob", "Jason", "Fred"], ["Bob", "Jason", "Fred"],
N, ["Bob", "Jason", "Fred"], ["Bob", "Fred", "Jason"],
Y, ["Bob", "Jason", "Fred"], ["Bob", "Jason"],
N, ["Jason", "Fred", "Bob"], ["Bob", "Jason"],
Y, ["Jason", "Bob"], ["Jason", "Sue", "Bob"],
N, ["Jason", "Sue", "Bob"], ["Jason", "Bob", "Sue"],
N, ["Sue", "Bob"], ["Jason", "Bob", "Sue"],
Y, ["Bob", "Sue"], ["Jason", "Bob", "Sue"],
N, ["Bob", "Sue", "Bob"], ["Bob", "Bob", "Sue"],
Y, ["Bob", "Sue", "Bob"], ["Bob", "Sue", "Bob"],
Y, ["Bob", "Sue", "Bob"], ["Sue", "Bob"]
];
for (i = 0; i < tests.length; i += 3) {
a = tests[i + 1];
b = tests[i + 2];
shouldMatch = tests[i];
doesMatch = match(a, b) || match(b, a);
if (shouldMatch !== doesMatch) fails++;
console.log(
shouldMatch ? "Y" : "N",
doesMatch ? "Y" : "N",
JSON.stringify(a),
JSON.stringify(b)
);
}
console.log(
"fails =", fails
);
function bit (i, n) {
return n >> i & 1;
}
function match (a, b) {
var offset = 0, x = 0, y = 0;
for (var i = 0; i < a.length; i++) {
for (var j = offset; j < b.length; j++) {
if (a[i] === b[j]) {
x += 1 << i;
y += 1 << j;
offset = j + 1;
j = b.length; // break
}
}
}
a = a.filter(function (_, i) {
return bit(i, x) === 0;
});
b = b.filter(function (_, i) {
return bit(i, y) === 0;
});
return !a.some(function (x) {
return b.some(function (y) {
return x === y;
});
});
}
Algorithm #2
Warning. The correctness of this algorithm must be verified (no counter-example so far).
The idea is to compare every combinations of N elements taken K, with K going from N to 0.
Specific case
Let's focus on the specific case where N = 3 and K = 2. To keep the code simple, I've padded the smallest word with a +
sign in order to start with words of the same length:
rmDots = w => w.replace(/\.+/g, ""); // dots remover
a = "ABA"; b = "BA+"; // words
n = 3; // words length
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
// explode `a` and `b`
x = a.split("");
y = b.split("");
// hide one letter starting
// from the rightmost one
x[n - (i + 1)] = ".";
y[n - (j + 1)] = ".";
// implode `a` and `b`
x = x.join("");
y = y.join("");
// print out
console.log(
// match ? "Yes" : "No"
rmDots(x) === rmDots(y) ? "Y" : "N",
JSON.stringify(x), JSON.stringify(y)
);
}
}
After that you have to check the pairs of matching combinations to filter the ones with no remaining common letters. For example, with "ABA" and "BAA", and with K = 2 (and N = 3, since N is the length of the strings), you get 3 pairs of matching combinations:
Y "A.A" ".AA"
Y ".BA" "BA."
Y ".BA" "B.A"
However, there is always one remaining letter in common (behind the dots), respectively, ".B."
and "B.."
, "A.."
and "..A"
, and "A.."
and ".A."
. Therefore, with K = 2, there is actually no pair of combinations matching your criteria, and you have to try again with K = 1.
Generalization
The following code snippet should help to understand the final algorithm:
function C (n, k) {
var acc = 1, i = 0;
while (++i <= k) acc *= (n - k + i) / i;
return acc;
}
function Ci (n, k, i) {
var j, c, flags = new Array(n);
for (j = 1; j <= n; j++) {
if (k > 0 && (c = C(n - j, k - 1)) > i) {
k -= 1; flags[j - 1] = true;
} else {
i -= c; flags[j - 1] = false;
}
}
return flags;
}
/* ignore this line */ (function(){for(var n=/^ */,e=">",t="<",r="!",o="+",i=Array.prototype.map,l=Array.prototype.slice,a=document.getElementsByTagName("pre"),u=0,c=arguments.length;u<c;u++)a[u].innerHTML=i.call(arguments[u],function(n){return p(n[0])+f(n[2])+s(n[1])}).join("");function p(t){var r=t.split("\n"),o=r[0].match(n)[0].length;return y(e,d(r.map(function(n){return n.slice(o)}).join("\n")))}function s(n){return n instanceof Error?y(r,g("#F00",n+"")):y(t,void 0===n?g("#999","undefined"):d(JSON.stringify(n)))}function f(n){return n.reduce(function(n,e){var t="string"!=typeof e[0],r=l.call(e).map(function(n){return"string"!=typeof n||t?JSON.stringify(n):n}).join(" ");return n+y(o,t?d(r):r)},"")}function y(n,e){return'<span style="display:block"><span style="display:inline-block">'+e.split("\n").map(function(e,t){return(0===t?n:" ")+" | "}).join("\n")+'</span><span style="display:inline-block">'+e+"</span></span>"}function g(n,e){return"<span "+('style="color:'+n+'"')+">"+e+"</span>"}function d(n){return"<code>"+n+"</code>"}}).apply(this,eval("["+function(){var n=/("|\\)/g,e=/^.*?\n|\n.*?$/g,t=Array.prototype.map,r=Array.prototype.filter,o=document.getElementsByTagName("pre");return t.call(o,function(t){return"["+r.call(t.childNodes,function(n){return 8===n.nodeType&&n.nodeValue.split("\n").length>2}).map(function(t){return["function(b,i,o){","return console.log=b[0],[","i,o,b[1]","];","}(function(f,l){","return console.log=function(){","return l.push(arguments),(","f.apply(console,arguments)",");","},[f,l];","}(console.log,[]),",t=JSON.stringify(t.nodeValue.replace(e,"")),',eval("try{',"eval(",t.replace(n,"\\$1"),")",'}catch(e){e}"))'].join("")}).join(",")+"]"}).join(",")}()+"]"));
/* ignore this line */ body{padding:1em !important}html,body{min-width:auto !important}
<link href="https://cdn.sstatic.net/Shared/stacks.css?v=58428843e325" rel="stylesheet"/>
<link href="https://cdn.sstatic.net/Sites/stackoverflow/primary.css?v=2ed743cc91af" rel="stylesheet"/>
<link href="https://cdn.sstatic.net/clc/styles/clc.min.css?v=768595a6d237" rel="stylesheet"/>
<blockquote>
<p><strong>Help.</strong> Run this snippet then press "Full page".</p>
</blockquote>
<blockquote>
<p><strong>Help.</strong> <code>></code> = "input", <code><</code> = "output", <code>+</code> = "log".</p>
</blockquote>
<p>Taking 2 things among 3:</p>
<pre>
<!--
n = 3
--><!--
k = 2
--><!--
Ci(n, k, 0) // 1st combination
--><!--
Ci(n, k, 2) // 3rd combination
-->
</pre>
<p>Enumerating combinations:</p>
<pre>
<!--
c = C(n, k) // how many combinations?
--><!--
for (i = 0; i < c; i++) {
console.log(i, Ci(n, k, i));
}
-->
</pre>
<p>Hiding 2 letters among 3:</p>
<pre>
<!--
letters = "ABC".split("")
--><!--
for (i = 0; i < c; i++) {
flags = Ci(n, k, i);
console.log(i, letters.map(function (letter, i) {
var hide = flags[i];
return hide ? "." : letter;
}).join(""), flags);
}
-->
</pre>
<p>Taking 2 letter among 3:</p>
<pre>
<!--
letters = "XYZ".split("")
--><!--
for (i = 0; i < c; i++) {
flags = Ci(n, k, i);
console.log(i, letters.filter(function (_, i) {
var take = flags[i];
return take;
}).join(""), flags);
}
-->
</pre>
Please keep in mind that this is a brute force approach. There are C(n,k) * C(n,k) possible pairs of combinations for each K (refer to Wikipedia for details about C(n,k)), therefore, the time it takes to compare the strings might grow exponentially (C(n,k)^2) regarding the size of the strings (n). In other words, big strings might exhaust your CPU...
fails = 0;
N = !(Y = true);
// tests[3 * i] = expected result
// tests[3 * i + 1] = array A
// tests[3 * i + 2] = array B
tests = [
N, "BAA".split(""), "ABA".split(""),
N, "ABA".split(""), "BAA".split(""),
Y, "ABA".split(""), "BA+".split(""),
Y, "BA+".split(""), "ABA".split(""),
Y, "ABACD".split(""), "BADCD".split(""),
Y, "ABACDDCABA".split(""), "BADCDDCDAB".split(""),
Y, ["Bob", "Jason", "Fred"], ["Bob", "Jason", "Fred"],
N, ["Bob", "Jason", "Fred"], ["Bob", "Fred", "Jason"],
Y, ["Bob", "Jason", "Fred"], ["Bob", "Jason"],
N, ["Jason", "Fred", "Bob"], ["Bob", "Jason"],
Y, ["Jason", "Bob"], ["Jason", "Sue", "Bob"],
N, ["Jason", "Sue", "Bob"], ["Jason", "Bob", "Sue"],
N, ["Sue", "Bob"], ["Jason", "Bob", "Sue"],
Y, ["Bob", "Sue"], ["Jason", "Bob", "Sue"],
N, ["Bob", "Sue", "Bob"], ["Bob", "Bob", "Sue"],
Y, ["Bob", "Sue", "Bob"], ["Bob", "Sue", "Bob"],
Y, ["Bob", "Sue", "Bob"], ["Sue", "Bob"]
];
for (i = 0; i < tests.length; i += 3) {
a = tests[i + 1];
b = tests[i + 2];
shouldMatch = tests[i];
doesMatch = match(a, b);
if (shouldMatch !== doesMatch) fails++;
console.log(
shouldMatch ? "Y" : "N",
doesMatch ? "Y" : "N",
JSON.stringify(a),
JSON.stringify(b)
);
}
console.log(
"fails =", fails
);
function C (n, k) {
var acc = 1, i = 0;
while (++i <= k) acc *= (n - k + i) / i;
return acc;
}
function Ci (n, k, i) {
var j, c, flags = new Array(n);
for (j = 1; j <= n; j++) {
if (k > 0 && (c = C(n - j, k - 1)) > i) {
k -= 1; flags[j - 1] = true;
} else {
i -= c; flags[j - 1] = false;
}
}
return flags;
}
function match (a, b) {
var n, c, i, j;
var k; // drop `k` elements
var a2, b2, a3, b3, aFlags, bFlags;
n = Math.max(a.length, b.length);
if (a.length < n) a = a.concat(
new Array(n - a.length).fill(null)
);
if (b.length < n) b = b.concat(
new Array(n - b.length).fill(null)
);
for (k = 0; k < n; k++) {
c = C(n, k);
for (i = 0; i < c; i++) {
aFlags = Ci(n, k, i);
a2 = a.filter(function (_, i) {
return !aFlags[i];
});
for (j = 0; j < c; j++) {
bFlags = Ci(n, k, j);
b2 = b.filter(function (_, i) {
return !bFlags[i];
});
// a2[i] = b2[i] (i in [0-(n-k)])
// means that we found the biggest
// sequence of common elements.
// Therefore, we can stop searching
// and check if there is at least
// one pair of common elements
// in the remaining set.
if (a2.every(function (x, i) {
return x === b2[i];
})) {
a3 = a.filter(function (_, i) {
return aFlags[i];
});
b3 = b.filter(function (_, i) {
return bFlags[i];
});
return !a3.some(function (x) {
return b3.some(function (y) {
return x === y;
});
});
}
}
}
}
return false;
}
Note that I pad the smallest array with null
values to start with arrays of the same length. It can be a problem if the arrays already contain null
values, but finding a workaround is not a big deal.