I am working on a very simple "querying syntax" usable by people with reasonable technical skills (i.e., not coders per se, but able to touch on the subject)
A typical example of what they would enter on a form is:
address like street
AND
vote = True
AND
(
(
age>=25
AND
gender = M
)
OR
(
age between [20,30]
AND
gender = F
)
OR
(
age >= 70
AND
eyes != blue
)
)
With
- no quote required
- potentially infinite nesting of parentheses
- simple AND|OR linking
I am using pyparsing (well, trying to anyway) and reaching something:
from pyparsing import *
OPERATORS = [
'<',
'<=',
'>',
'>=',
'=',
'!=',
'like'
'regexp',
'between'
]
unicode_printables = u''.join(unichr(c) for c in xrange(65536)
if not unichr(c).isspace())
# user_input is the text sent by the client form
user_input = ' '.join(user_input.split())
user_input = '(' + user_input + ')'
AND = Keyword("AND").setName('AND')
OR = Keyword("OR").setName('OR')
FIELD = Word(alphanums).setName('FIELD')
OPERATOR = oneOf(OPERATORS).setName('OPERATOR')
VALUE = Word(unicode_printables).setName('VALUE')
CRITERION = FIELD + OPERATOR + VALUE
QUERY = Forward()
NESTED_PARENTHESES = nestedExpr('(', ')')
QUERY << ( CRITERION | AND | OR | NESTED_PARENTHESES )
RESULT = QUERY.parseString(user_input)
RESULT.pprint()
The output is:
[['address',
'like',
'street',
'AND',
'vote',
'=',
'True',
'AND',
[['age>=25', 'AND', 'gender', '=', 'M'],
'OR',
['age', 'between', '[20,30]', 'AND', 'gender', '=', 'F'],
'OR',
['age', '>=', '70', 'AND', 'eyes', '!=', 'blue']]]]
Which I am only partially happy with - the main reason being that the desired final output would look like this:
[
{
"field" : "address",
"operator" : "like",
"value" : "street",
},
'AND',
{
"field" : "vote",
"operator" : "=",
"value" : True,
},
'AND',
[
[
{
"field" : "age",
"operator" : ">=",
"value" : 25,
},
'AND'
{
"field" : "gender",
"operator" : "=",
"value" : "M",
}
],
'OR',
[
{
"field" : "age",
"operator" : "between",
"value" : [20,30],
},
'AND'
{
"field" : "gender",
"operator" : "=",
"value" : "F",
}
],
'OR',
[
{
"field" : "age",
"operator" : ">=",
"value" : 70,
},
'AND'
{
"field" : "eyes",
"operator" : "!=",
"value" : "blue",
}
],
]
]
Many thanks!
EDIT
After Paul's answer, this is what the code looks like. Obviously it works much more nicely :-)
unicode_printables = u''.join(unichr(c) for c in xrange(65536)
if not unichr(c).isspace())
user_input = ' '.join(user_input.split())
AND = oneOf(['AND', '&'])
OR = oneOf(['OR', '|'])
FIELD = Word(alphanums)
OPERATOR = oneOf(OPERATORS)
VALUE = Word(unicode_printables)
COMPARISON = FIELD + OPERATOR + VALUE
QUERY = infixNotation(
COMPARISON,
[
(AND, 2, opAssoc.LEFT,),
(OR, 2, opAssoc.LEFT,),
]
)
class ComparisonExpr:
def __init__(self, tokens):
self.tokens = tokens
def __str__(self):
return "Comparison:('field': {!r}, 'operator': {!r}, 'value': {!r})".format(*self.tokens.asList())
COMPARISON.addParseAction(ComparisonExpr)
RESULT = QUERY.parseString(user_input).asList()
print type(RESULT)
from pprint import pprint
pprint(RESULT)
The output is:
[
[
<[snip]ComparisonExpr instance at 0x043D0918>,
'AND',
<[snip]ComparisonExpr instance at 0x043D0F08>,
'AND',
[
[
<[snip]ComparisonExpr instance at 0x043D3878>,
'AND',
<[snip]ComparisonExpr instance at 0x043D3170>
],
'OR',
[
[
<[snip]ComparisonExpr instance at 0x043D3030>,
'AND',
<[snip]ComparisonExpr instance at 0x043D3620>
],
'AND',
[
<[snip]ComparisonExpr instance at 0x043D3210>,
'AND',
<[snip]ComparisonExpr instance at 0x043D34E0>
]
]
]
]
]
Is there a way to return RESULT with dictionaries and not ComparisonExpr
instances?
EDIT2
Came up with a naive and very specific solution, but which works for me so far:
[snip]
class ComparisonExpr:
def __init__(self, tokens):
self.tokens = tokens
def __str__(self):
return "Comparison:('field': {!r}, 'operator': {!r}, 'value': {!r})".format(*self.tokens.asList())
def asDict(self):
return {
"field": self.tokens.asList()[0],
"operator": self.tokens.asList()[1],
"value": self.tokens.asList()[2]
}
[snip]
RESULT = QUERY.parseString(user_input).asList()[0]
def convert(list):
final = []
for item in list:
if item.__class__.__name__ == 'ComparisonExpr':
final.append(item.asDict())
elif item in ['AND', 'OR']:
final.append(item)
elif item.__class__.__name__ == 'list':
final.append(convert(item))
else:
print 'ooops forgotten something maybe?'
return final
FINAL = convert(RESULT)
pprint(FINAL)
Which outputs:
[{'field': 'address', 'operator': 'LIKE', 'value': 'street'},
'AND',
{'field': 'vote', 'operator': '=', 'value': 'true'},
'AND',
[[{'field': 'age', 'operator': '>=', 'value': '25'},
'AND',
{'field': 'gender', 'operator': '=', 'value': 'M'}],
'OR',
[[{'field': 'age', 'operator': 'BETWEEN', 'value': '[20,30]'},
'AND',
{'field': 'gender', 'operator': '=', 'value': 'F'}],
'AND',
[{'field': 'age', 'operator': '>=', 'value': '70'},
'AND',
{'field': 'eyes', 'operator': '!=', 'value': 'blue'}]]]]
Again thanks to Paul for pointing me if a right direction!
The only thing unknown left is for me to turn 'true'
into True
and '[20,30]'
into [20, 30]
.