I built a trie for a dictionary lookup class. It seems to work fine except the trie is quite quite large. Seems to be about 80 MB, and from what I have read it should only be 5 MB big. I am not sure what is making the trie balloon up to 80 MB, once its loaded it runs incredible fast though.
Trie Class
public class Trie {
private TrieNode root = new TrieNode();
public const int ASCIIA = 97;
public TrieNode Insert(string word) {
char[] charArray = word.ToLower().ToCharArray();
TrieNode node = root;
foreach (char character in charArray) {
node = Insert(character, node);
}
node.IsEnd = true;
return root;
}
private TrieNode Insert(char character, TrieNode node) {
if (node.Contains(character)) {
return node.GetChild(character);
} else {
int number = System.Convert.ToByte(character) - TrieNode.ASCIIA;
TrieNode treeNode = new TrieNode();
node.nodes[number] = treeNode;
treeNode.Value = number;
return treeNode;
}
}
TrieNode Class:
public class TrieNode {
public TrieNode[] nodes;
public bool IsEnd {get; set;}
public int Value {get; set;}
public const int ASCIIA = 97;
public const int ENGL = 26;
public TrieNode() {
nodes = new TrieNode[ENGL];
}
public bool Contains(char character) {
if (character == 0)
return false;
int number = System.Convert.ToByte(character) - ASCIIA;
if (number > ENGL)
return false;
return (nodes[number] != null);
}
public bool Contains(int character) {
if (character == 0)
return false;
return (nodes[character] != null);
}
public TrieNode GetChild(char character) {
int number = System.Convert.ToByte(character) - ASCIIA;
return nodes[number];
}
public TrieNode GetChild(int character) {
return nodes[character];
}
And then to Gen the trie, using a dictionary of 170,000 words:
string[] lines = fileTXT.Split("\n"[0]);
for (int i = 0; i < data.Length;i++) {
trieDict.Insert(data[i]);
}