No, your eval()
implementation is not safe. Other objects can give access to the __builtins__
mapping too.
See eval is dangerous by Ned Batchelder for some examples.
For example, the following string can cause a segfault:
s = """
(lambda fc=(
lambda n: [
c for c in
().__class__.__bases__[0].__subclasses__()
if c.__name__ == n
][0]
):
fc("function")(
fc("code")(
0,0,0,0,"KABOOM",(),(),(),"","",0,""
),{}
)()
)()
"""
Merely disallowing double underscores is not going to be enough, I fear. Some enterprising soul will find a method of re-combining double underscores in a creative way you didn't anticipate and you'll find yourself hacked anyway.
The ast.literal_eval()
method uses AST parsing and custom handling of the parsetree to support built-in types. You could do the same to by adding support for 'safe' callables:
import ast
def literal_eval_with_callables(node_or_string, safe_callables=None):
if safe_callables is None:
safe_callables = {}
if isinstance(node_or_string, str):
node_or_string = ast.parse(node_or_string, mode='eval')
if isinstance(node_or_string, ast.Expression):
node_or_string = node_or_string.body
try:
# Python 3.4 and up
ast.NameConstant
const_test = lambda n: isinstance(n, ast.NameConstant)
const_extract = lambda n: n.value
except AttributeError:
# Everything before
_const_names = {'None': None, 'True': True, 'False': False}
const_test = lambda n: isinstance(n, ast.Name) and n.id in _const_names
const_extract = lambda n: _const_names[n.id]
def _convert(node):
if isinstance(node, (ast.Str, ast.Bytes)):
return node.s
elif isinstance(node, ast.Num):
return node.n
elif isinstance(node, ast.Tuple):
return tuple(map(_convert, node.elts))
elif isinstance(node, ast.List):
return list(map(_convert, node.elts))
elif isinstance(node, ast.Dict):
return dict((_convert(k), _convert(v)) for k, v
in zip(node.keys, node.values))
elif const_test(node):
return const_extract(node)
elif isinstance(node, ast.UnaryOp) and \
isinstance(node.op, (ast.UAdd, ast.USub)) and \
isinstance(node.operand, (ast.Num, ast.UnaryOp, ast.BinOp)):
operand = _convert(node.operand)
if isinstance(node.op, ast.UAdd):
return + operand
else:
return - operand
elif isinstance(node, ast.BinOp) and \
isinstance(node.op, (ast.Add, ast.Sub)) and \
isinstance(node.right, (ast.Num, ast.UnaryOp, ast.BinOp)) and \
isinstance(node.right.n, complex) and \
isinstance(node.left, (ast.Num, ast.UnaryOp, astBinOp)):
left = _convert(node.left)
right = _convert(node.right)
if isinstance(node.op, ast.Add):
return left + right
else:
return left - right
elif isinstance(node, ast.Call) and \
isinstance(node.func, ast.Name) and \
node.func.id in safe_callables:
return safe_callables[node.func.id](
*[_convert(n) for n in node.args],
**{kw.arg: _convert(kw.value) for kw in node.keywords})
raise ValueError('malformed string')
return _convert(node_or_string)
The above function adapts the implementation of ast.literal_eval()
to add support for specific registered callables. Pass in a dictionary naming Fraction
:
>>> import fractions
>>> safe_callables = {'Fraction': fractions.Fraction}
>>> literal_eval_with_callables('Fraction(1, denominator=2)', safe_callables)
Fraction(1, 2)
The above method whitelists rather than blacklists what will be handled. Calling Fraction
is allowed, and directly controlled exactly what object will be called, for example.
Demo:
>>> samples = '''\
... 2, -9, -35
... 4, -13, 3
... -6, 13, 5
... 5, -6, 6
... -2, 5, -3
... 4, 12, 9
... 0.5, 1, 1
... 1, -0.5, -0.5
... 0.25, Fraction(-1, 3), Fraction(1, 9)'''.splitlines()
>>> safe_callables = {'Fraction': fractions.Fraction}
>>> for line in samples:
... print(literal_eval_with_callables(line, safe_callables))
...
(2, -9, -35)
(4, -13, 3)
(-6, 13, 5)
(5, -6, 6)
(-2, 5, -3)
(4, 12, 9)
(0.5, 1, 1)
(1, -0.5, -0.5)
(0.25, Fraction(-1, 3), Fraction(1, 9))
The above should work on at least Python 3.3 and up, possibly earlier. Python 2 would require some more work to support unicode
strings and not break on ast.Bytes
.