I had to guess what your makefile structure allows based on your example, but this should get you close:
from pyparsing import *
# elements of the makefile are delimited by line, so we must
# define skippable whitespace to include just spaces and tabs
ParserElement.setDefaultWhitespaceChars(' \t')
NL = LineEnd().suppress()
EQ,COLON,LBRACK,RBRACK = map(Suppress, "=:[]")
identifier = Word(alphas+'_', alphanums)
symbol_assignment = Group(identifier("name") + EQ + empty +
restOfLine("value"))("symbol_assignment")
symbol_ref = Word("$",alphanums+"_.")
def only_column_one(s,l,t):
if col(l,s) != 1:
raise ParseException(s,l,"not in column 1")
# task identifiers have to start in column 1
task_identifier = identifier.copy().setParseAction(only_column_one)
task_description = "des:" + empty + restOfLine("des")
task_path = "path:" + empty + restOfLine("path")
task_para_body = delimitedList(symbol_ref)
task_para = "para:" + LBRACK + task_para_body("para") + RBRACK
task_para.ignore(NL)
task_definition = Group(task_identifier("target") + COLON +
Optional(delimitedList(identifier))("deps") + NL +
(
Optional(task_description + NL) &
Optional(task_path + NL) &
Optional(task_para + NL)
)
)("task_definition")
makefile_parser = ZeroOrMore(
symbol_assignment |
task_definition |
NL
)
if __name__ == "__main__":
test = """\
samtools=/path/to/samtools
picard=/path/to/picard
task1:
des: description
path: /path/to/task1
para: [$global.samtools,
$args.input,
$path
]
task2: task1
"""
# dump out what we parsed, including results names
for element in makefile_parser.parseString(test):
print element.getName()
print element.dump()
print
Prints:
symbol_assignment
['samtools', '/path/to/samtools']
- name: samtools
- value: /path/to/samtools
symbol_assignment
['picard', '/path/to/picard']
- name: picard
- value: /path/to/picard
task_definition
['task1', 'des:', 'description ', 'path:', '/path/to/task1 ', 'para:',
'$global.samtools', '$args.input', '$path']
- des: description
- para: ['$global.samtools', '$args.input', '$path']
- path: /path/to/task1
- target: task1
task_definition
['task2', 'task1']
- deps: ['task1']
- target: task2
The dump() output shows you what names you can use to get at the fields within the parsed elements, or to distinguish what kind of element you have. dump() is a handy, generic tool to output whatever pyparsing has parsed. Here is some code that is more specific to your particular parser, showing how to use the field names as either dotted object references (element.target
, element.deps
, element.name
, etc.) or dict-style references (element[key]
):
for element in makefile_parser.parseString(test):
if element.getName() == 'task_definition':
print "TASK:", element.target,
if element.deps:
print "DEPS:(" + ','.join(element.deps) + ")"
else:
print
for key in ('des', 'path', 'para'):
if key in element:
print " ", key.upper()+":", element[key]
elif element.getName() == 'symbol_assignment':
print "SYM:", element.name, "->", element.value
prints:
SYM: samtools -> /path/to/samtools
SYM: picard -> /path/to/picard
TASK: task1
DES: description
PATH: /path/to/task1
PARA: ['$global.samtools', '$args.input', '$path']
TASK: task2 DEPS:(task1)