In Summary
I am creating a tree structure out of a text file input using a function from SO question: Python file parsing: Build tree from text file. But I am able to produce my tree only by using a global variable and cannot find a way of avoiding this.
Input Data
In a file called data.txt
I have the following:
Root
-A 10
-B
--B A 2
--B B 5
--B Z 9
--B X
---X 4
---Y
----Y0 67
----Y1 32
---Z 3
-C 19
Desired Result
{'B': ['B A 2', 'B B 5', 'B Z 9', 'B X'],
'B X': ['X 4', 'Y', 'Z 3'],
'Root': ['A 10', 'B', 'C 19'],
'Y': ['Y0 67', 'Y1 32']}
My Code
import re, pprint
PATTERN = re.compile('^[-]+')
tree = {}
def _recurse_tree(parent, depth, source):
last_line = source.readline().rstrip()
while last_line:
if last_line.startswith('-'):
tabs = len( re.match(PATTERN, last_line).group() )
else:
tabs = 0
if tabs < depth:
break
node = re.sub(PATTERN, '', last_line.strip())
if tabs >= depth:
if parent is not None:
print "%s: %s" %(parent, node)
if parent in tree:
tree[parent].append(node)
else:
tree[parent] = [ node, ]
last_line = _recurse_tree(node, tabs+1, source)
return last_line
def main():
inFile = open("data.txt")
_recurse_tree(None, 0, inFile)
pprint.pprint(tree)
if __name__ == "__main__":
main()
The Problem
How do I get rid of the global variable tree
? Everything I do seems to make the code much longer or uglier but I would like to use the function heavily and I hate depending on side-effects for the core result.
Supplement
After the answers below, I revised the code to return the tree
in the following way. Is this pythonic? Returning a tuple and then tossing the first element seems inelegant.
def _recurse_tree(parent, depth, source, tree=None):
if tree is None:
tree = {}
last_line = source.readline().rstrip()
while last_line:
if last_line.startswith('-'):
tabs = len( re.match(PATTERN, last_line).group() )
else:
tabs = 0
if tabs < depth:
break
node = re.sub(PATTERN, '', last_line.strip())
if tabs >= depth:
if parent is not None:
print "%s: %s" %(parent, node)
if parent in tree:
tree[parent].append(node)
else:
tree[parent] = [ node, ]
last_line, tree = _recurse_tree(node, tabs+1, source, tree)
return last_line, tree
def main():
inFile = open("data.txt")
tmp, tree = _recurse_tree(None, 0, inFile)
pprint.pprint(tree)