1

What is the fastest way to loop through each possible combination of elements in a jagged array with an unknown number of rows, and an unknown number of columns in each row?

This array...

char[][] myArray = new char[][]{
    new char[] {'A', 'B'},
    new char[] {'C', 'D'},
    new char[] {'E', 'F'}
};

...would return the combinations ACE, ACF, ADE, ADF, BCE, BCF, BDE, and BDF.

What is the fastest way using C# to accomplish this?

Aaron Thomas
  • 5,054
  • 8
  • 43
  • 89

3 Answers3

3

Here is IMO a good algorithm with a minimum allocations (avoids string concatenations):

public static class Algorithms
{
    public static IEnumerable<string> GetCombinations(this char[][] input)
    {
        var result = new char[input.Length]; 
        var indices = new int[input.Length];
        for (int pos = 0, index = 0; ;)
        {
            for (; pos < input.Length; pos++, index = 0)
            {
                indices[pos] = index;
                result[pos] = input[pos][index];
            }
            yield return new string(result);
            do
            {
                if (pos == 0) yield break;
                index = indices[--pos] + 1;
            }
            while (index >= input[pos].Length);
        }
    }
}

Usage:

char[][] myArray = new char[][]{
    new char[] {'A', 'B'},
    new char[] {'C', 'D'},
    new char[] {'E', 'F'}
};
var combinations = myArray.GetCombinations();

Basically it's an unrolled implementation of something like this

from c1 in input[0]
from c2 in input[1]
...
from cN in input[N]
select new string(new [] { c1, c2, ..., cN })

P.S If you actually need char[] type result, simply change the signature to

public static IEnumerable<char[]> GetCombinations(this char[][] input)

and remove the new string from the yield.

But note that in such case the consumer of the enumerable should be responsible of making a copy of the combination array if it needs to store it. Yielding shared internal mutating array is bad (evil) from the public API design standpoint, but perfect for internal performance scenarios.

UPDATE: Since the question is about the performance, I've made a test to compare the string versions of the above algorithm (A) with the LINQ solution from Enigmativity answer(B). I've ran it against different number of 26 letter alphabet combinations from Release built exe outside the VS, and here are the results:

A: N=2 Count=         676 Time=00:00:00.0010139 Memory=     16K
B: N=2 Count=         676 Time=00:00:00.0042598 Memory=    233K

A: N=3 Count=      17,576 Time=00:00:00.0004310 Memory=    348K
B: N=3 Count=      17,576 Time=00:00:00.0126294 Memory=  2,185K

A: N=4 Count=     456,976 Time=00:00:00.0111155 Memory=  1,496K
B: N=4 Count=     456,976 Time=00:00:00.4019500 Memory=  2,104K

A: N=5 Count=  11,881,376 Time=00:00:00.2813208 Memory=  1,995K
B: N=5 Count=  11,881,376 Time=00:00:13.4492150 Memory=  2,014K

A: N=6 Count= 308,915,776 Time=00:00:07.5473890 Memory=  2,059K
B: N=6 Count= 308,915,776 Time=00:07:37.2985051 Memory=    455K

Here is the full test code in case someone is interested:

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Linq;
using System.Threading;

namespace Samples
{
    public static class Algorithms
    {
        public static IEnumerable<string> GetCombinationsA(this char[][] input)
        {
            var result = new char[input.Length];
            var indices = new int[input.Length];
            for (int pos = 0, index = 0; ;)
            {
                for (; pos < input.Length; pos++, index = 0)
                {
                    indices[pos] = index;
                    result[pos] = input[pos][index];
                }
                yield return new string(result);
                do
                {
                    if (pos == 0) yield break;
                    index = indices[--pos] + 1;
                }
                while (index >= input[pos].Length);
            }
        }

        public static IEnumerable<string> GetCombinationsB(this char[][] input)
        {
            Func<IEnumerable<IEnumerable<char>>, IEnumerable<IEnumerable<char>>> combine = null;
            combine = css =>
                        from c in css.First()
                        from cs in css.Skip(1).Any()
                            ? combine(css.Skip(1))
                            : new[] { Enumerable.Empty<char>() }
                        select new[] { c }.Concat(cs);
            return combine(input).Select(c => String.Join("", c));
        }
    }

    class Program
    {
        class Algorithm
        {
            public string Name;
            public Func<char[][], IEnumerable<string>> Func;
        }

        static void Main(string[] args)
        {
            Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
            Algorithm[] algorithms =
            {
                new Algorithm { Name = "A", Func = Algorithms.GetCombinationsA },
                new Algorithm { Name = "B", Func = Algorithms.GetCombinationsB },
            };
            char[][] myArray = { new char[] {'A', 'B'}, new char[] {'C', 'D'}, new char[] {'E', 'F'} };
            foreach (var algo in algorithms)
                algo.Func(myArray);
            var chars = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => (char)c).ToArray();
            for (int n = 2; n < 7; n++)
            {
                var input = Enumerable.Range(0, n).Select(_ => chars).ToArray();
                foreach (var algo in algorithms)
                    Test(algo, input);
                Console.WriteLine();
            }
            Console.WriteLine("Done.");
            Console.ReadLine();
        }

        static void Test(Algorithm algo, char[][] input)
        {
            GC.Collect(); GC.WaitForPendingFinalizers();
            GC.Collect(); GC.WaitForPendingFinalizers();
            var totalMem = GC.GetTotalMemory(false);
            var timer = Stopwatch.StartNew();
            long count = 0;
            foreach (var comb in algo.Func(input)) count++;
            timer.Stop();
            totalMem = GC.GetTotalMemory(false) - totalMem;
            Console.WriteLine($"{algo.Name}: N={input.Length} Count={count,12:n0} Time={timer.Elapsed} Memory={totalMem / 1024,7:n0}K");
        }
    }
}
Community
  • 1
  • 1
Ivan Stoev
  • 195,425
  • 15
  • 312
  • 343
1

This works pretty sweet:

Func<IEnumerable<IEnumerable<char>>, IEnumerable<IEnumerable<char>>> combine = null;
combine = css =>
            from c in css.First()
            from cs in css.Skip(1).Any()
                ? combine(css.Skip(1)) 
                : new [] { Enumerable.Empty<char>() }
            select new [] { c }.Concat(cs);

If I turn the result of running your data into a string with this:

var result =
    String.Join(
        ", ",
        combine(myArray)
            .Select(c => String.Join("", c)));

...then I get this result: ACE, ACF, ADE, ADF, BCE, BCF, BDE, BDF.

This computes very fast, but it would be interesting to know what the real world input is to see if this is fast enough.

Enigmativity
  • 113,464
  • 11
  • 89
  • 172
  • *"This computes very fast, but it would be interesting to know what the real world input is to see if this is fast enough"* Take a look at the benchmark at the end of my answer :) – Ivan Stoev Feb 19 '16 at 09:38
  • @IvanStoev - Yes, that's fair enough. The data provided in the question runs very fast. That's all that I was commenting on. And that it would be interesting to see what the real world input is to see if it still holds up. – Enigmativity Feb 19 '16 at 11:47
  • Recursive lambda is an interesting approach, but looks like the performance starts degrading significantly when adding more levels (like any recursive iterator I guess) . Would be interesting to test another LINQ-ish solution with pre-chained `SelectMany`. Anyway, just a quite interesting problem domain:) – Ivan Stoev Feb 19 '16 at 11:56
  • @IvanStoev - What I have found doing this kind of thing is that a judiciously placed `.ToArray()` can give a fairly good performance boost (or penalty). It would be worth trying a few combinations of those. – Enigmativity Feb 19 '16 at 11:59
0

Ivan's answer is awesome. I made some slight tweaks to make it more generic and remove the evil mutating array he mentioned. It does allocate slightly more memory this way.

/// <summary>
/// Returns all permutations of a jagged array.
/// Ex: {{A1,A2},{B1,B2}} would return {A1B1, A1B2, A2B1, A2B2}
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
public static IEnumerable<T[]> GetCombinations<T>(IReadOnlyList<IReadOnlyList<T>> input)
{
    var result = new T[input.Count];
    var indices = new int[input.Count];
    int i1 = 0, i2 = 0;

    while (true)
    {
        while (i1 < input.Count)
        {
            indices[i1] = i2;
            result[i1] = input[i1][i2];
            i1++;
            i2 = 0;
        }

        yield return result.ToArray();

        do
        {
            if (i1 == 0) yield break;
            i2 = indices[--i1] + 1;
        }
        while (i2 >= input[i1].Count);
    }
}

Unit Tests

[Xunit.Theory]
[InlineData(new[] {"A"}, new[] {"A"})]
[InlineData(new[] {"A","B"}, new[] {"A","B"})]
[InlineData(new[] { "A,B" }, new[] { "A" }, new[] { "B" })]
[InlineData(new[] { "A1,B", "A2,B" }, new[] { "A1", "A2" }, new[] { "B" })]
[InlineData(new[] { "A,B1", "A,B2" }, new[] { "A" }, new[] { "B1", "B2" })]
[InlineData(new[] { "A1,B1", "A1,B2", "A2,B1", "A2,B2" }, new[] { "A1", "A2" }, new[] { "B1", "B2" })]
[InlineData(
    new[] { "A1,B1,C1", "A1,B1,C2", "A1,B1,C3", "A1,B2,C1", "A1,B2,C2", "A1,B2,C3" }, 
    new[] { "A1" }, new[] { "B1", "B2" }, new[] { "C1", "C2", "C3" })
]
public void GetCombinationsShouldGatherAllPermutations(string[] expectedOutput, params string[][] input)
{
    var results = ThreatInserterActor.GetCombinations(input);
    var flattened = results.Select(x => string.Join(",", x)).ToList();
    CollectionAssert.AreEquivalent(expectedOutput, flattened);
}