Building off of @yazanpro's trie suggestion, it was possible to build a solution which not only can search the words but also stop additional searches where ABC needs to be omitted so there is no need to check for ABCFACE, ABCHOLE, ABCHEAD...
The logic inside the tree hydration is currently in place to format from an non-optimized list of words to construct an optimized list of words 10 to a line. Copy the output and paste back in as the new WORDS list.
The value storage for each tree node, this could be optimized to not require storage of the word. However, storing the word makes this easier to debug change IsObscene to a settable property instead of using word:
public class ObsceneValue
{
public bool IsObscene
{
get { return Word != null; }
}
public string Word { get; set; }
public char Character { get; set; }
public ObsceneValue(char character, string fullWord = null)
{
Character = character;
Word = fullWord;
}
}
The node to represent the tree structure:
public class ObsceneNode
{
public ObsceneNode(ObsceneValue value)
{
Value = value;
Children = new Dictionary<char, ObsceneNode>();
}
public ObsceneValue Value { get; set; }
public Dictionary<char, ObsceneNode> Children { get; set; }
public bool HasChild(char character)
{
return Children.ContainsKey(character);
}
public ObsceneNode SafeAddChild(ObsceneValue value)
{
if (HasChild(value.Character))
{
return GetChild(value.Character);
}
var node = new ObsceneNode(value);
Children.Add(value.Character, node);
return node;
}
public ObsceneNode GetChild(char character)
{
if (HasChild(character))
{
return Children[character];
}
return null;
}
}
The actual obscenity filter, which has debug logic to omit duplicates found in nearly every list of obscene words on the internet which would require redundant checks or in this case deeper than needed trees:
public static string[] WORDS = new[]
{
"ANY","LIST","OF","WORDS"
};
private static ObsceneNode _root;
static SmutBlocker()
{
//abbreviatedList can be omitted once an optimized list of
//terms is set to WORDS.
var abbreviatedList = new List<string>();
_root = new ObsceneNode(new ObsceneValue(default(char), null));
var iter = _root;
foreach (var word in WORDS)
{
for(var i = 0; i < word.Length; i++)
{
if (iter.Value.IsObscene)
break;
var isObscene = (word.Length - 1) == i;
iter = iter.SafeAddChild(new ObsceneValue(word[i], isObscene ? word : null));
//The below list is to capture the optimized list
if (isObscene)
abbreviatedList.Add(word.ToUpper());
}
iter = _root;
}
//Purely for fixing a non-optimized list
//Remove once list is optimized
var output = String.Empty;
for (var i = 0; i < abbreviatedList.Count(); i += 10)
{
var segment = abbreviatedList.Skip(i).Take(10);
output += "\"" + String.Join("\",\"", segment) + "\"," + Environment.NewLine;
}
}
//Finally the actual IsObscene check that loops through the tree.
public static bool IsObscene(string text)
{
var iters = new List<ObsceneNode>(text.Length);
for (var i = 0; i < text.Length; i++)
{
iters.Add(_root);
var c = text[i];
for (int j = iters.Count() - 1; j >= 0; j--)
{
if (iters[j].HasChild(c))
{
iters[j] = iters[j].GetChild(c);
if (iters[j].Value.IsObscene)
{
return true;
}
}
else
{
iters.RemoveAt(j);
}
}
}
return false;
}