This can be solved efficiently as variations on the Aho Corasick algorithm
It is an efficient dictionary matching algorithm that locates patterns within text simultaneously in O(p + q + r)
, with p
= length of patterns, q
= length of text, r
= length of returned matches.
You may want to run two separate state machines simultaneously, and you would need to modify them so they terminate on the first match.
I took a stab at the modifications, starting with this python implementation
class AhoNode(object):
def __init__(self):
self.goto = {}
self.is_match = False
self.fail = None
def aho_create_forest(patterns):
root = AhoNode()
for path in patterns:
node = root
for symbol in path:
node = node.goto.setdefault(symbol, AhoNode())
node.is_match = True
return root
def aho_create_statemachine(patterns):
root = aho_create_forest(patterns)
queue = []
for node in root.goto.itervalues():
queue.append(node)
node.fail = root
while queue:
rnode = queue.pop(0)
for key, unode in rnode.goto.iteritems():
queue.append(unode)
fnode = rnode.fail
while fnode is not None and key not in fnode.goto:
fnode = fnode.fail
unode.fail = fnode.goto[key] if fnode else root
unode.is_match = unode.is_match or unode.fail.is_match
return root
def aho_any_match(s, root):
node = root
for i, c in enumerate(s):
while node is not None and c not in node.goto:
node = node.fail
if node is None:
node = root
continue
node = node.goto[c]
if node.out:
return True
return False
def all_any_matcher(*pattern_lists):
''' Returns an efficient matcher function that takes a string
and returns True if at least one pattern from each pattern list
is found in it.
'''
machines = [aho_create_statemachine(patterns) for patterns in pattern_lists]
def matcher(text):
return all(aho_any_match(text, m) for m in machines)
return matcher
and to use it
patterns_a = ['nice','car','by','shop']
patterns_b = ['no','thing','great']
matcher = all_any_matcher(patterns_a, patterns_b)
text_1 = 'there is a car over there'
text_2 = 'no one is in a car'
for text in (text_1, text_2):
print '%r - %s' % (text, matcher(text))
This displays
'there is a car over there' - False
'no one is in a car' - True