Here is a custom implementation of the trie algorithm in c# (http://en.wikipedia.org/wiki/Trie).
It is used to perform an indexed string via prefixes. This class has O(1) write and reads for leaf nodes. For prefix searches, the performance is O(log n), however the count of results for prefix is O(1).
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
public class StringIndex
{
private Dictionary<char, Item> _rootChars;
public StringIndex()
{
_rootChars = new Dictionary<char, Item>();
}
public void Add(string value, string data)
{
int level = 0;
Dictionary<char, Item> currentChars = _rootChars;
Item currentItem = null;
foreach (char c in value)
{
if (currentChars.ContainsKey(c))
{
currentItem = currentChars[c];
}
else
{
currentItem = new Item() { Level = level, Letter = c };
currentChars.Add(c, currentItem);
}
currentChars = currentItem.Items;
level++;
}
if (!currentItem.Values.Contains(data))
{
currentItem.Values.Add(data);
IncrementCount(value);
}
}
private void IncrementCount(string value)
{
Dictionary<char, Item> currentChars = _rootChars;
Item currentItem = null;
foreach (char c in value)
{
currentItem = currentChars[c];
currentItem.Total++;
currentChars = currentItem.Items;
}
}
public void Remove(string value, string data)
{
Dictionary<char, Item> currentChars = _rootChars;
Dictionary<char, Item> parentChars = null;
Item currentItem = null;
foreach (char c in value)
{
if (currentChars.ContainsKey(c))
{
currentItem = currentChars[c];
parentChars = currentChars;
currentChars = currentItem.Items;
}
else
{
return; // no matches found
}
}
if (currentItem.Values.Contains(data))
{
currentItem.Values.Remove(data);
DecrementCount(value);
if (currentItem.Total == 0)
{
parentChars.Remove(currentItem.Letter);
}
}
}
private void DecrementCount(string value)
{
Dictionary<char, Item> currentChars = _rootChars;
Item currentItem = null;
foreach (char c in value)
{
currentItem = currentChars[c];
currentItem.Total--;
currentChars = currentItem.Items;
}
}
public void Clear()
{
_rootChars.Clear();
}
public int GetValuesByPrefixCount(string prefix)
{
int valuescount = 0;
int level = 0;
Dictionary<char, Item> currentChars = _rootChars;
Item currentItem = null;
foreach (char c in prefix)
{
if (currentChars.ContainsKey(c))
{
currentItem = currentChars[c];
currentChars = currentItem.Items;
}
else
{
return valuescount; // no matches found
}
level++;
}
valuescount = currentItem.Total;
return valuescount;
}
public HashSet<string> GetValuesByPrefixFlattened(string prefix)
{
var results = GetValuesByPrefix(prefix);
return new HashSet<string>(results.SelectMany(x => x));
}
public List<HashSet<string>> GetValuesByPrefix(string prefix)
{
var values = new List<HashSet<string>>();
int level = 0;
Dictionary<char, Item> currentChars = _rootChars;
Item currentItem = null;
foreach (char c in prefix)
{
if (currentChars.ContainsKey(c))
{
currentItem = currentChars[c];
currentChars = currentItem.Items;
}
else
{
return values; // no matches found
}
level++;
}
ExtractValues(values, currentItem);
return values;
}
public void ExtractValues(List<HashSet<string>> values, Item item)
{
foreach (Item subitem in item.Items.Values)
{
ExtractValues(values, subitem);
}
values.Add(item.Values);
}
public class Item
{
public int Level { get; set; }
public char Letter { get; set; }
public int Total { get; set; }
public HashSet<string> Values { get; set; }
public Dictionary<char, Item> Items { get; set; }
public Item()
{
Values = new HashSet<string>();
Items = new Dictionary<char, Item>();
}
}
}
Here is the unit testing & example code for how to use this class.
using System;
using Microsoft.VisualStudio.TestTools.UnitTesting;
[TestClass]
public class StringIndexTest
{
[TestMethod]
public void AddAndSearchValues()
{
var si = new StringIndex();
si.Add("abcdef", "1");
si.Add("abcdeff", "2");
si.Add("abcdeffg", "3");
si.Add("bcdef", "4");
si.Add("bcdefg", "5");
si.Add("cdefg", "6");
si.Add("cdefgh", "7");
var output = si.GetValuesByPrefixFlattened("abc");
Assert.IsTrue(output.Contains("1") && output.Contains("2") && output.Contains("3"));
}
[TestMethod]
public void RemoveAndSearch()
{
var si = new StringIndex();
si.Add("abcdef", "1");
si.Add("abcdeff", "2");
si.Add("abcdeffg", "3");
si.Add("bcdef", "4");
si.Add("bcdefg", "5");
si.Add("cdefg", "6");
si.Add("cdefgh", "7");
si.Remove("abcdef", "1");
var output = si.GetValuesByPrefixFlattened("abc");
Assert.IsTrue(!output.Contains("1") && output.Contains("2") && output.Contains("3"));
}
[TestMethod]
public void Clear()
{
var si = new StringIndex();
si.Add("abcdef", "1");
si.Add("abcdeff", "2");
si.Add("abcdeffg", "3");
si.Add("bcdef", "4");
si.Add("bcdefg", "5");
si.Add("cdefg", "6");
si.Add("cdefgh", "7");
si.Clear();
var output = si.GetValuesByPrefix("abc");
Assert.IsTrue(output.Count == 0);
}
[TestMethod]
public void AddAndSearchValuesCount()
{
var si = new StringIndex();
si.Add("abcdef", "1");
si.Add("abcdeff", "2");
si.Add("abcdeffg", "3");
si.Add("bcdef", "4");
si.Add("bcdefg", "5");
si.Add("cdefg", "6");
si.Add("cdefgh", "7");
si.Remove("cdefgh", "7");
var output1 = si.GetValuesByPrefixCount("abc");
var output2 = si.GetValuesByPrefixCount("b");
var output3 = si.GetValuesByPrefixCount("bc");
var output4 = si.GetValuesByPrefixCount("ca");
Assert.IsTrue(output1 == 3 && output2 == 2 && output3 == 2 && output4 == 0);
}
}
Any suggestions on how to improve this class are welcome :)