One option is to keep a running list of the tokens that are being built up into abstract syntax trees, per the rules of the grammar. The algorithm can work in two parts:
- A token is read in and pushed to each list that is storing the AST being built up.
- The grammar rules are run against each sublist, spinning off subsequent potential matches.
Using a dictionary to define the grammar:
grammar = [
('E', {'op':'or',
'left':{'op':'and', 'left':'E', 'right':'E'},
'right':'id'
}
)
]
src = 'id id id'
def matches(name, pattern, tree):
if not tree:
return tree, None, 0
if isinstance(pattern, str):
if tree[-1]['name'] == pattern:
return tree[:-1], {'name':name, 'value':tree[-1]} if 'value' not in tree[-1] else tree[-1], 1
return tree, None, 0
if pattern['op']=='or':
if (m:=matches(name, pattern['right'], tree))[-1] or (m:=matches(name, pattern['left'], tree))[-1]:
return m
return tree, None, 0
if (m1:=matches(name, pattern['right'], tree))[-1] and (m2:=matches(name, pattern['left'], m1[0]))[-1]:
return m2[0], {'name':name, 'left':m1[1], 'right':m2[1]}, 1
return tree, None, 0
def reduce(tree):
yield (tree[:-1], tree[-1], 0)
for a, b in grammar:
if (m:=matches(a, b, tree))[-1]:
yield m
def get_asts(stream, queue=[]):
if queue:
if not stream and len(queue)==1:
yield queue
for a, b, c in reduce(queue):
if c:
yield from get_asts(stream, a+[b])
if stream:
yield from get_asts(stream[1:], queue+[stream[0]])
t = [{'name':i} for i in src.split()]
import json
for i in get_asts(t):
print(json.dumps(i, indent=4))
[
{
"name": "E",
"left": {
"name": "E",
"value": {
"name": "id"
}
},
"right": {
"name": "E",
"value": {
"name": "E",
"left": {
"name": "E",
"value": {
"name": "id"
}
},
"right": {
"name": "E",
"value": {
"name": "id"
}
}
}
}
}
]
[
{
"name": "E",
"left": {
"name": "E",
"value": {
"name": "E",
"left": {
"name": "E",
"value": {
"name": "id"
}
},
"right": {
"name": "E",
"value": {
"name": "id"
}
}
}
},
"right": {
"name": "E",
"value": {
"name": "id"
}
}
}
]