Update I just wasn't too happy about my using the regexen to parse so simple input; yet I disliked the manual index manipulation jungle found in other answers.
So I replaced the tokenizing with a Enumerator-based scanner with two alternating token-states. This is more justified by the complexity of the input, and has a 'Linqy' feel to it (although it really isn't Linq). I have kept the original Regex based parser at the end of my post for interested readers.
This just had to be solved using Eric Lippert's/IanG's CartesianProduct Linq extension method, in which the core of the program becomes:
public static void Main(string[] args)
{
const string data = @"{Hello|Hi|Hi-Hi} my {mate|m8|friend|friends}, {i|we} want to {tell|say} you {hello|hi|hi-hi}.";
var pockets = Tokenize(data.GetEnumerator());
foreach (var result in CartesianProduct(pockets))
Console.WriteLine(string.Join("", result.ToArray()));
}
Using just two regexen (chunks
and legs
) to do the parsing into 'pockets
', it becomes a matter of writing the CartesianProduct to the console :) Here is the full working code (.NET 3.5+):
using System;
using System.Text;
using System.Text.RegularExpressions;
using System.Linq;
using System.Collections.Generic;
namespace X
{
static class Y
{
private static bool ReadTill(this IEnumerator<char> input, string stopChars, Action<StringBuilder> action)
{
var sb = new StringBuilder();
try
{
while (input.MoveNext())
if (stopChars.Contains(input.Current))
return true;
else
sb.Append(input.Current);
} finally
{
action(sb);
}
return false;
}
private static IEnumerable<IEnumerable<string>> Tokenize(IEnumerator<char> input)
{
var result = new List<IEnumerable<string>>();
while(input.ReadTill("{", sb => result.Add(new [] { sb.ToString() })) &&
input.ReadTill("}", sb => result.Add(sb.ToString().Split('|'))))
{
// Console.WriteLine("Expected cumulative results: " + result.Select(a => a.Count()).Aggregate(1, (i,j) => i*j));
}
return result;
}
public static void Main(string[] args)
{
const string data = @"{Hello|Hi|Hi-Hi} my {mate|m8|friend|friends}, {i|we} want to {tell|say} you {hello|hi|hi-hi}.";
var pockets = Tokenize(data.GetEnumerator());
foreach (var result in CartesianProduct(pockets))
Console.WriteLine(string.Join("", result.ToArray()));
}
static IEnumerable<IEnumerable<T>> CartesianProduct<T>(this IEnumerable<IEnumerable<T>> sequences)
{
IEnumerable<IEnumerable<T>> emptyProduct = new[] { Enumerable.Empty<T>() };
return sequences.Aggregate(
emptyProduct,
(accumulator, sequence) =>
from accseq in accumulator
from item in sequence
select accseq.Concat(new[] {item}));
}
}
}
Old Regex based parsing:
static readonly Regex chunks = new Regex(@"^(?<chunk>{.*?}|.*?(?={|$))+$", RegexOptions.Compiled);
static readonly Regex legs = new Regex(@"^{((?<alternative>.*?)[\|}])+(?<=})$", RegexOptions.Compiled);
private static IEnumerable<String> All(this Regex regex, string text, string group)
{
return !regex.IsMatch(text)
? new [] { text }
: regex.Match(text).Groups[group].Captures.Cast<Capture>().Select(c => c.Value);
}
public static void Main(string[] args)
{
const string data = @"{Hello|Hi|Hi-Hi} my {mate|m8|friend|friends}, {i|we} want to {tell|say} you {hello|hi|hi-hi}.";
var pockets = chunks.All(data, "chunk").Select(v => legs.All(v, "alternative"));
The rest is unchanged