I'm stuck with this one: I have adopted bauble (a program on github) and part of it is meant to specify a query on an sql database. the query language is really three different languages, one of which (filtering as in an SQL query) I am rewriting.
the original author had chosen for pyparsing and I have no reasons for reviewing that choice, other than I do not know pyparsing and I have always had fun with lex and yacc... but I decided I will keep pyparsing, so I'm learning it.
I have (re)written a parser that recognizes the given query, and most grammar categories translate to classes. I suppose the parsing part is quite fine, the point where I'm stuck is where the objects I've created with pyparsing need use SQLAlchemy to query the database, in particular when I'm filtering based on attributes from joined tables.
the relevant part of the grammar, in pyparsing format:
query_expression = Forward()
identifier = Group(delimitedList(Word(alphas, alphanums+'_'),
'.')).setParseAction(IdentifierToken)
ident_expression = (
Group(identifier + binop + value).setParseAction(IdentExpressionToken)
| (
Literal('(') + query_expression + Literal(')')
).setParseAction(ParenthesisedQuery))
query_expression << infixNotation(
ident_expression,
[ (NOT_, 1, opAssoc.RIGHT, SearchNotAction),
(AND_, 2, opAssoc.LEFT, SearchAndAction),
(OR_, 2, opAssoc.LEFT, SearchOrAction) ] )
and the corresponding classes (the evaluate
method of the last two ones is what I don't yet know how to write):
class BinaryLogical(object):
## abstract base class. `name` is defined in derived classes
def __init__(self, t):
self.op = t[0][1]
self.operands = t[0][0::2] # every second object is an operand
def __repr__(self):
return "(%s %s %s)" % (self.operands[0], self.name, self.operands[1])
class SearchAndAction(BinaryLogical):
name = 'AND'
def evaluate(self, domain, session):
return self.operands[0].evaluate(domain, session).intersect_all(
map(lambda i: i.evaluate(domain, session), self.operands[1:]))
class SearchOrAction(BinaryLogical):
name = 'OR'
def evaluate(self, domain, session):
return self.operands[0].evaluate(domain, session).union_all(
map(lambda i: i.evaluate(domain, session), self.operands[1:]))
class SearchNotAction(object):
name = 'NOT'
def __init__(self, t):
self.op, self.operand = t[0]
def evaluate(self, domain, session):
return session.query(domain).except_(self.operand.evaluate(domain, session))
def __repr__(self):
return "%s %s" % (self.name, str(self.operand))
class ParenthesisedQuery(object):
def __init__(self, t):
self.query = t[1]
def __repr__(self):
return "(%s)" % self.query.__repr__()
def evaluate(self, domain, session):
return self.query.evaluate(domain, session)
class IdentifierToken(object):
def __init__(self, t):
self.value = t[0]
def __repr__(self):
return '.'.join(self.value)
def evaluate(self, domain, session):
q = session.query(domain)
if len(self.value) > 1:
q = q.join(self.value[:-1], aliased=True)
return q.subquery().c[self.value[-1]]
class IdentExpressionToken(object):
def __init__(self, t):
self.op = t[0][1]
self.operation = {'>': lambda x,y: x>y,
'<': lambda x,y: x<y,
'>=': lambda x,y: x>=y,
'<=': lambda x,y: x<=y,
'=': lambda x,y: x==y,
'!=': lambda x,y: x!=y,
}[self.op]
self.operands = t[0][0::2] # every second object is an operand
def __repr__(self):
return "(%s %s %s)" % ( self.operands[0], self.op, self.operands[1])
def evaluate(self, domain, session):
return session.query(domain).filter(self.operation(self.operands[0].evaluate(domain, session),
self.operands[1].express()))
the complete and most up-to-date code for the above snippets is here.
a couple possible queries:
results = mapper_search.search("plant where accession.species.id=44")
results = mapper_search.search("species where genus.genus='Ixora'")
results = mapper_search.search("species where genus.genus=Maxillaria and not genus.family=Orchidaceae")