Here's the code I have currently to solve this problem. I have enough tests running against it to qualify it for my use case, but it's probably not 100% correct. I certainly don't handle all possible query documents.
def check_doc_against_mongo_query(doc, query):
"""Return whether the given doc would be returned by the given query.
Initially this might seem like work the db should be doing, but consider a use case where we
need to run many complex queries regularly to count matches. If each query results in a full-
collection scan, it is often faster to run a single scan fetching the entire collection into
memory, then run all of the matches locally.
We don't support mongo's full query syntax here, so we'll need to add support as the need
arises."""
# Run our check recursively
return _match_query(doc, query)
def _match_query(doc, query):
"""Return whether the given doc matches the given query."""
# We don't expect a null query
assert query is not None
# Check each top-level field for a match, we AND them together, so return on mismatch
for k, v in query.items():
# Check for AND/OR operators
if k == Mongo.AND:
if not all(_match_query(doc, x) for x in v):
return False
elif k == Mongo.OR:
if not any(_match_query(doc, x) for x in v):
return False
elif k == Mongo.COMMENT:
# Ignore comments
pass
else:
# Now grab the doc's value and match it against the given query value
doc_v = nested_dict_get(doc, k)
if not _match_doc_and_query_value(doc_v, v):
return False
# All top-level fields matched so return match
return True
def _match_doc_and_query_value(doc_v, query_v):
"""Return whether the given doc and query values match."""
cmps = [] # we AND these together below, trailing bool for negation
# Check for operators
if isinstance(query_v, Mapping):
# To handle 'in' we use a tuple, otherwise we use an operator and a value
for k, v in query_v.items():
if k == Mongo.IN:
cmps.append((operator.eq, tuple(v), False))
elif k == Mongo.NIN:
cmps.append((operator.eq, tuple(v), True))
else:
op = {Mongo.EQ: operator.eq, Mongo.GT: operator.gt, Mongo.GTE: operator.ge,
Mongo.LT: operator.lt, Mongo.LTE: operator.le, Mongo.NE: operator.ne}[
k]
cmps.append((op, v, False))
else:
# We expect a simple value here, perform an equality check
cmps.append((operator.eq, query_v, False))
# Now perform each comparison
return all(_invert(_match_cmp(op, doc_v, v), invert) for op, v, invert in cmps)
def _invert(result, invert):
"""Invert the given result if necessary."""
return not result if invert else result
def _match_cmp(op, doc_v, v):
"""Return whether the given values match with the given comparison operator.
If v is a tuple then we require op to match with any element.
We take care to handle comparisons with null the same way mongo does, i.e. only null ==/<=/>=
null returns true, all other comps with null return false. See:
https://stackoverflow.com/questions/29835829/mongodb-comparison-operators-with-null
for details.
As an important special case of null comparisons, ne null matches any non-null value.
"""
if doc_v is None and v is None:
return op in (operator.eq, operator.ge, operator.le)
elif op is operator.ne and v is None:
return doc_v is not None
elif v is None:
return False
elif isinstance(v, tuple):
return any(op(doc_v, x) for x in v)
else:
return op(doc_v, v)