Is it possible to do partial string formatting with the advanced string formatting methods, similar to the string template safe_substitute()
function?
For example:
s = '{foo} {bar}'
s.format(foo='FOO') #Problem: raises KeyError 'bar'
Is it possible to do partial string formatting with the advanced string formatting methods, similar to the string template safe_substitute()
function?
For example:
s = '{foo} {bar}'
s.format(foo='FOO') #Problem: raises KeyError 'bar'
You could use the partial
function from functools
which is short, most readable and also describes the coder's intention:
from functools import partial
s = partial("{foo} {bar}".format, foo="FOO")
print s(bar="BAR")
# FOO BAR
If you know in what order you're formatting things:
s = '{foo} {{bar}}'
Use it like this:
ss = s.format(foo='FOO')
print ss
>>> 'FOO {bar}'
print ss.format(bar='BAR')
>>> 'FOO BAR'
You can't specify foo
and bar
at the same time - you have to do it sequentially.
You can trick it into partial formatting by overwriting the mapping:
import string
class FormatDict(dict):
def __missing__(self, key):
return "{" + key + "}"
s = '{foo} {bar}'
formatter = string.Formatter()
mapping = FormatDict(foo='FOO')
print(formatter.vformat(s, (), mapping))
printing
FOO {bar}
Of course this basic implementation only works correctly for basic cases.
This limitation of .format()
- the inability to do partial substitutions - has been bugging me.
After evaluating writing a custom Formatter
class as described in many answers here and even considering using third-party packages such as lazy_format, I discovered a much simpler inbuilt solution: Template strings
It provides similar functionality but also provides partial substitution thorough safe_substitute()
method. The template strings need to have a $
prefix (which feels a bit weird - but the overall solution I think is better).
import string
template = string.Template('${x} ${y}')
try:
template.substitute({'x':1}) # raises KeyError
except KeyError:
pass
# but the following raises no error
partial_str = template.safe_substitute({'x':1}) # no error
# partial_str now contains a string with partial substitution
partial_template = string.Template(partial_str)
substituted_str = partial_template.safe_substitute({'y':2}) # no error
print substituted_str # prints '12'
Formed a convenience wrapper based on this:
class StringTemplate(object):
def __init__(self, template):
self.template = string.Template(template)
self.partial_substituted_str = None
def __repr__(self):
return self.template.safe_substitute()
def format(self, *args, **kws):
self.partial_substituted_str = self.template.safe_substitute(*args, **kws)
self.template = string.Template(self.partial_substituted_str)
return self.__repr__()
>>> s = StringTemplate('${x}${y}')
>>> s
'${x}${y}'
>>> s.format(x=1)
'1${y}'
>>> s.format({'y':2})
'12'
>>> print s
12
Similarly a wrapper based on Sven's answer which uses the default string formatting:
class StringTemplate(object):
class FormatDict(dict):
def __missing__(self, key):
return "{" + key + "}"
def __init__(self, template):
self.substituted_str = template
self.formatter = string.Formatter()
def __repr__(self):
return self.substituted_str
def format(self, *args, **kwargs):
mapping = StringTemplate.FormatDict(*args, **kwargs)
self.substituted_str = self.formatter.vformat(self.substituted_str, (), mapping)
Not sure if this is ok as a quick workaround, but how about
s = '{foo} {bar}'
s.format(foo='FOO', bar='{bar}')
? :)
If you define your own Formatter
which overrides the get_value
method, you could use that to map undefined field names to whatever you wanted:
http://docs.python.org/library/string.html#string.Formatter.get_value
For instance, you could map bar
to "{bar}"
if bar
isn't in the kwargs.
However, that requires using the format()
method of your Formatter object, not the string's format()
method.
>>> 'fd:{uid}:{{topic_id}}'.format(uid=123)
'fd:123:{topic_id}'
Try this out.
Thanks to Amber's comment, I came up with this:
import string
try:
# Python 3
from _string import formatter_field_name_split
except ImportError:
formatter_field_name_split = str._formatter_field_name_split
class PartialFormatter(string.Formatter):
def get_field(self, field_name, args, kwargs):
try:
val = super(PartialFormatter, self).get_field(field_name, args, kwargs)
except (IndexError, KeyError, AttributeError):
first, _ = formatter_field_name_split(field_name)
val = '{' + field_name + '}', first
return val
All the solutions I've found seemed to have issues with more advanced spec or conversion options. @SvenMarnach's FormatPlaceholder is wonderfully clever but it doesn't work properly with coercion (e.g. {a!s:>2s}
) because it calls the __str__
method (in this example) instead of __format__
and you lose any additional formatting.
Here's what I ended up with and some of it's key features:
sformat('The {} is {}', 'answer')
'The answer is {}'
sformat('The answer to {question!r} is {answer:0.2f}', answer=42)
'The answer to {question!r} is 42.00'
sformat('The {} to {} is {:0.{p}f}', 'answer', 'everything', p=4)
'The answer to everything is {:0.4f}'
str.format
(not just a mapping){k!s}
{!r}
{k:>{size}}
{k.foo}
{k[0]}
{k!s:>{size}}
import string
class SparseFormatter(string.Formatter):
"""
A modified string formatter that handles a sparse set of format
args/kwargs.
"""
# re-implemented this method for python2/3 compatibility
def vformat(self, format_string, args, kwargs):
used_args = set()
result, _ = self._vformat(format_string, args, kwargs, used_args, 2)
self.check_unused_args(used_args, args, kwargs)
return result
def _vformat(self, format_string, args, kwargs, used_args, recursion_depth,
auto_arg_index=0):
if recursion_depth < 0:
raise ValueError('Max string recursion exceeded')
result = []
for literal_text, field_name, format_spec, conversion in \
self.parse(format_string):
orig_field_name = field_name
# output the literal text
if literal_text:
result.append(literal_text)
# if there's a field, output it
if field_name is not None:
# this is some markup, find the object and do
# the formatting
# handle arg indexing when empty field_names are given.
if field_name == '':
if auto_arg_index is False:
raise ValueError('cannot switch from manual field '
'specification to automatic field '
'numbering')
field_name = str(auto_arg_index)
auto_arg_index += 1
elif field_name.isdigit():
if auto_arg_index:
raise ValueError('cannot switch from manual field '
'specification to automatic field '
'numbering')
# disable auto arg incrementing, if it gets
# used later on, then an exception will be raised
auto_arg_index = False
# given the field_name, find the object it references
# and the argument it came from
try:
obj, arg_used = self.get_field(field_name, args, kwargs)
except (IndexError, KeyError):
# catch issues with both arg indexing and kwarg key errors
obj = orig_field_name
if conversion:
obj += '!{}'.format(conversion)
if format_spec:
format_spec, auto_arg_index = self._vformat(
format_spec, args, kwargs, used_args,
recursion_depth, auto_arg_index=auto_arg_index)
obj += ':{}'.format(format_spec)
result.append('{' + obj + '}')
else:
used_args.add(arg_used)
# do any conversion on the resulting object
obj = self.convert_field(obj, conversion)
# expand the format spec, if needed
format_spec, auto_arg_index = self._vformat(
format_spec, args, kwargs,
used_args, recursion_depth-1,
auto_arg_index=auto_arg_index)
# format the object and append to the result
result.append(self.format_field(obj, format_spec))
return ''.join(result), auto_arg_index
def sformat(s, *args, **kwargs):
# type: (str, *Any, **Any) -> str
"""
Sparse format a string.
Parameters
----------
s : str
args : *Any
kwargs : **Any
Examples
--------
>>> sformat('The {} is {}', 'answer')
'The answer is {}'
>>> sformat('The answer to {question!r} is {answer:0.2f}', answer=42)
'The answer to {question!r} is 42.00'
>>> sformat('The {} to {} is {:0.{p}f}', 'answer', 'everything', p=4)
'The answer to everything is {:0.4f}'
Returns
-------
str
"""
return SparseFormatter().format(s, *args, **kwargs)
I discovered the issues with the various implementations after writing some tests on how I wanted this method to behave. They're below if anyone finds them insightful.
import pytest
def test_auto_indexing():
# test basic arg auto-indexing
assert sformat('{}{}', 4, 2) == '42'
assert sformat('{}{} {}', 4, 2) == '42 {}'
def test_manual_indexing():
# test basic arg indexing
assert sformat('{0}{1} is not {1} or {0}', 4, 2) == '42 is not 2 or 4'
assert sformat('{0}{1} is {3} {1} or {0}', 4, 2) == '42 is {3} 2 or 4'
def test_mixing_manualauto_fails():
# test mixing manual and auto args raises
with pytest.raises(ValueError):
assert sformat('{!r} is {0}{1}', 4, 2)
def test_kwargs():
# test basic kwarg
assert sformat('{base}{n}', base=4, n=2) == '42'
assert sformat('{base}{n}', base=4, n=2, extra='foo') == '42'
assert sformat('{base}{n} {key}', base=4, n=2) == '42 {key}'
def test_args_and_kwargs():
# test mixing args/kwargs with leftovers
assert sformat('{}{k} {v}', 4, k=2) == '42 {v}'
# test mixing with leftovers
r = sformat('{}{} is the {k} to {!r}', 4, 2, k='answer')
assert r == '42 is the answer to {!r}'
def test_coercion():
# test coercion is preserved for skipped elements
assert sformat('{!r} {k!r}', '42') == "'42' {k!r}"
def test_nesting():
# test nesting works with or with out parent keys
assert sformat('{k:>{size}}', k=42, size=3) == ' 42'
assert sformat('{k:>{size}}', size=3) == '{k:>3}'
@pytest.mark.parametrize(
('s', 'expected'),
[
('{a} {b}', '1 2.0'),
('{z} {y}', '{z} {y}'),
('{a} {a:2d} {a:04d} {y:2d} {z:04d}', '1 1 0001 {y:2d} {z:04d}'),
('{a!s} {z!s} {d!r}', '1 {z!s} {\'k\': \'v\'}'),
('{a!s:>2s} {z!s:>2s}', ' 1 {z!s:>2s}'),
('{a!s:>{a}s} {z!s:>{z}s}', '1 {z!s:>{z}s}'),
('{a.imag} {z.y}', '0 {z.y}'),
('{e[0]:03d} {z[0]:03d}', '042 {z[0]:03d}'),
],
ids=[
'normal',
'none',
'formatting',
'coercion',
'formatting+coercion',
'nesting',
'getattr',
'getitem',
]
)
def test_sformat(s, expected):
# test a bunch of random stuff
data = dict(
a=1,
b=2.0,
c='3',
d={'k': 'v'},
e=[42],
)
assert expected == sformat(s, **data)
For me this was good enough:
>>> ss = 'dfassf {} dfasfae efaef {} fds'
>>> nn = ss.format('f1', '{}')
>>> nn
'dfassf f1 dfasfae efaef {} fds'
>>> n2 = nn.format('whoa')
>>> n2
'dfassf f1 dfasfae efaef whoa fds'
My suggestion would be the following (tested with Python3.6):
class Lazymap(object):
def __init__(self, **kwargs):
self.dict = kwargs
def __getitem__(self, key):
return self.dict.get(key, "".join(["{", key, "}"]))
s = '{foo} {bar}'
s.format_map(Lazymap(bar="FOO"))
# >>> '{foo} FOO'
s.format_map(Lazymap(bar="BAR"))
# >>> '{foo} BAR'
s.format_map(Lazymap(bar="BAR", foo="FOO", baz="BAZ"))
# >>> 'FOO BAR'
Update:
An even more elegant way (subclassing dict
and overloading __missing__(self, key)
) is shown here: https://stackoverflow.com/a/17215533/333403
If you'd like to unpack a dictionary to pass arguments to format
, as in this related question, you could use the following method.
First assume the string s
is the same as in this question:
s = '{foo} {bar}'
and the values are given by the following dictionary:
replacements = {'foo': 'FOO'}
Clearly this won't work:
s.format(**replacements)
#---------------------------------------------------------------------------
#KeyError Traceback (most recent call last)
#<ipython-input-29-ef5e51de79bf> in <module>()
#----> 1 s.format(**replacements)
#
#KeyError: 'bar'
However, you could first get a set
of all of the named arguments from s
and create a dictionary that maps the argument to itself wrapped in curly braces:
from string import Formatter
args = {x[1]:'{'+x[1]+'}' for x in Formatter().parse(s)}
print(args)
#{'foo': '{foo}', 'bar': '{bar}'}
Now use the args
dictionary to fill in the missing keys in replacements
. For python 3.5+, you can do this in a single expression:
new_s = s.format(**{**args, **replacements}}
print(new_s)
#'FOO {bar}'
For older versions of python, you could call update
:
args.update(replacements)
print(s.format(**args))
#'FOO {bar}'
Here's a mildly-hacky regex-based solution. Note that this will NOT work with nested format specifiers like {foo:{width}}
, but it does fix some of the problems that other answers have.
def partial_format(s, **kwargs):
parts = re.split(r'(\{[^}]*\})', s)
for k, v in kwargs.items():
for idx, part in enumerate(parts):
if re.match(rf'\{{{k}[!:}}]', part): # Placeholder keys must always be followed by '!', ':', or the closing '}'
parts[idx] = parts[idx].format_map({k: v})
return ''.join(parts)
# >>> partial_format('{foo} {bar:1.3f}', foo='FOO')
# 'FOO {bar:1.3f}'
# >>> partial_format('{foo} {bar:1.3f}', bar=1)
# '{foo} 1.000'
There is one more way to achieve this i.e by using format
and %
to replace variables. For example:
>>> s = '{foo} %(bar)s'
>>> s = s.format(foo='my_foo')
>>> s
'my_foo %(bar)s'
>>> s % {'bar': 'my_bar'}
'my_foo my_bar'
Assuming you won't use the string until it's completely filled out, you could do something like this class:
class IncrementalFormatting:
def __init__(self, string):
self._args = []
self._kwargs = {}
self._string = string
def add(self, *args, **kwargs):
self._args.extend(args)
self._kwargs.update(kwargs)
def get(self):
return self._string.format(*self._args, **self._kwargs)
Example:
template = '#{a}:{}/{}?{c}'
message = IncrementalFormatting(template)
message.add('abc')
message.add('xyz', a=24)
message.add(c='lmno')
assert message.get() == '#24:abc/xyz?lmno'
A very ugly but the simplest solution for me is to just do:
tmpl = '{foo}, {bar}'
tmpl.replace('{bar}', 'BAR')
Out[3]: '{foo}, BAR'
This way you still can use tmpl
as regular template and perform partial formatting only when needed. I find this problem too trivial to use a overkilling solution like Mohan Raj's.
After testing the most promising solutions from here and there, I realized that none of them really met the following requirements:
str.format_map()
for the template;So, I wrote my own solution, which satisfies the above requirements. (EDIT: now the version by @SvenMarnach -- as reported in this answer -- seems to handle the corner cases I needed).
Basically, I ended up parsing the template string, finding matching nested {.*?}
groups (using a find_all()
helper function) and building the formatted string progressively and directly using str.format_map()
while catching any potential KeyError
.
def find_all(
text,
pattern,
overlap=False):
"""
Find all occurrencies of the pattern in the text.
Args:
text (str|bytes|bytearray): The input text.
pattern (str|bytes|bytearray): The pattern to find.
overlap (bool): Detect overlapping patterns.
Yields:
position (int): The position of the next finding.
"""
len_text = len(text)
offset = 1 if overlap else (len(pattern) or 1)
i = 0
while i < len_text:
i = text.find(pattern, i)
if i >= 0:
yield i
i += offset
else:
break
def matching_delimiters(
text,
l_delim,
r_delim,
including=True):
"""
Find matching delimiters in a sequence.
The delimiters are matched according to nesting level.
Args:
text (str|bytes|bytearray): The input text.
l_delim (str|bytes|bytearray): The left delimiter.
r_delim (str|bytes|bytearray): The right delimiter.
including (bool): Include delimeters.
yields:
result (tuple[int]): The matching delimiters.
"""
l_offset = len(l_delim) if including else 0
r_offset = len(r_delim) if including else 0
stack = []
l_tokens = set(find_all(text, l_delim))
r_tokens = set(find_all(text, r_delim))
positions = l_tokens.union(r_tokens)
for pos in sorted(positions):
if pos in l_tokens:
stack.append(pos + 1)
elif pos in r_tokens:
if len(stack) > 0:
prev = stack.pop()
yield (prev - l_offset, pos + r_offset, len(stack))
else:
raise ValueError(
'Found `{}` unmatched right token(s) `{}` (position: {}).'
.format(len(r_tokens) - len(l_tokens), r_delim, pos))
if len(stack) > 0:
raise ValueError(
'Found `{}` unmatched left token(s) `{}` (position: {}).'
.format(
len(l_tokens) - len(r_tokens), l_delim, stack.pop() - 1))
def safe_format_map(
text,
source):
"""
Perform safe string formatting from a mapping source.
If a value is missing from source, this is simply ignored, and no
`KeyError` is raised.
Args:
text (str): Text to format.
source (Mapping|None): The mapping to use as source.
If None, uses caller's `vars()`.
Returns:
result (str): The formatted text.
"""
stack = []
for i, j, depth in matching_delimiters(text, '{', '}'):
if depth == 0:
try:
replacing = text[i:j].format_map(source)
except KeyError:
pass
else:
stack.append((i, j, replacing))
result = ''
i, j = len(text), 0
while len(stack) > 0:
last_i = i
i, j, replacing = stack.pop()
result = replacing + text[j:last_i] + result
if i > 0:
result = text[0:i] + result
return result
(This code is also available in FlyingCircus -- DISCLAIMER: I am the main author of it.)
The usage for this code would be:
print(safe_format_map('{a} {b} {c}', dict(a=-A-)))
# -A- {b} {c}
Let's compare this to the my favourite solution (by @SvenMarnach who kindly shared his code here and there):
import string
class FormatPlaceholder:
def __init__(self, key):
self.key = key
def __format__(self, spec):
result = self.key
if spec:
result += ":" + spec
return "{" + result + "}"
def __getitem__(self, index):
self.key = "{}[{}]".format(self.key, index)
return self
def __getattr__(self, attr):
self.key = "{}.{}".format(self.key, attr)
return self
class FormatDict(dict):
def __missing__(self, key):
return FormatPlaceholder(key)
def safe_format_alt(text, source):
formatter = string.Formatter()
return formatter.vformat(text, (), FormatDict(source))
Here are a couple of tests:
test_texts = (
'{b} {f}', # simple nothing useful in source
'{a} {b}', # simple
'{a} {b} {c:5d}', # formatting
'{a} {b} {c!s}', # coercion
'{a} {b} {c!s:>{a}s}', # formatting and coercion
'{a} {b} {c:0{a}d}', # nesting
'{a} {b} {d[x]}', # dicts (existing in source)
'{a} {b} {e.index}', # class (existing in source)
'{a} {b} {f[g]}', # dict (not existing in source)
'{a} {b} {f.values}', # class (not existing in source)
)
source = dict(a=4, c=101, d=dict(x='FOO'), e=[])
and the code to make it running:
funcs = safe_format_map, safe_format_alt
n = 18
for text in test_texts:
full_source = {**dict(b='---', f=dict(g='Oh yes!')), **source}
print('{:>{n}s} : OK : '.format('str.format_map', n=n) + text.format_map(full_source))
for func in funcs:
try:
print(f'{func.__name__:>{n}s} : OK : ' + func(text, source))
except:
print(f'{func.__name__:>{n}s} : FAILED : {text}')
resulting in:
str.format_map : OK : --- {'g': 'Oh yes!'}
safe_format_map : OK : {b} {f}
safe_format_alt : OK : {b} {f}
str.format_map : OK : 4 ---
safe_format_map : OK : 4 {b}
safe_format_alt : OK : 4 {b}
str.format_map : OK : 4 --- 101
safe_format_map : OK : 4 {b} 101
safe_format_alt : OK : 4 {b} 101
str.format_map : OK : 4 --- 101
safe_format_map : OK : 4 {b} 101
safe_format_alt : OK : 4 {b} 101
str.format_map : OK : 4 --- 101
safe_format_map : OK : 4 {b} 101
safe_format_alt : OK : 4 {b} 101
str.format_map : OK : 4 --- 0101
safe_format_map : OK : 4 {b} 0101
safe_format_alt : OK : 4 {b} 0101
str.format_map : OK : 4 --- FOO
safe_format_map : OK : 4 {b} FOO
safe_format_alt : OK : 4 {b} FOO
str.format_map : OK : 4 --- <built-in method index of list object at 0x7f7a485666c8>
safe_format_map : OK : 4 {b} <built-in method index of list object at 0x7f7a485666c8>
safe_format_alt : OK : 4 {b} <built-in method index of list object at 0x7f7a485666c8>
str.format_map : OK : 4 --- Oh yes!
safe_format_map : OK : 4 {b} {f[g]}
safe_format_alt : OK : 4 {b} {f[g]}
str.format_map : OK : 4 --- <built-in method values of dict object at 0x7f7a485da090>
safe_format_map : OK : 4 {b} {f.values}
safe_format_alt : OK : 4 {b} {f.values}
as you can see, the updated version now seems to handle well the corner cases where the earlier version used to fail.
Timewise, they are within approx. 50% of each other, depending on the actual text
to format (and likely the actual source
), but safe_format_map()
seems to have an edge in most of the tests I performed (whatever they mean, of course):
for text in test_texts:
print(f' {text}')
%timeit safe_format(text * 1000, source)
%timeit safe_format_alt(text * 1000, source)
{b} {f}
3.93 ms ± 153 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
6.35 ms ± 51.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
{a} {b}
4.37 ms ± 57.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
5.2 ms ± 159 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
{a} {b} {c:5d}
7.15 ms ± 91.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
7.76 ms ± 69.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
{a} {b} {c!s}
7.04 ms ± 138 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
7.56 ms ± 161 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
{a} {b} {c!s:>{a}s}
8.91 ms ± 113 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
10.5 ms ± 181 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
{a} {b} {c:0{a}d}
8.84 ms ± 147 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
10.2 ms ± 202 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
{a} {b} {d[x]}
7.01 ms ± 197 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
7.35 ms ± 106 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
{a} {b} {e.index}
11 ms ± 68.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
8.78 ms ± 405 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
{a} {b} {f[g]}
6.55 ms ± 88.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
9.12 ms ± 159 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
{a} {b} {f.values}
6.61 ms ± 55.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
9.92 ms ± 98.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
I like @sven-marnach answer. My answer is simply an extended version of it. It allows non-keyword formatting and ignores extra keys. Here are examples of usage (the name of a function is a reference to python 3.6 f-string formatting):
# partial string substitution by keyword
>>> f('{foo} {bar}', foo="FOO")
'FOO {bar}'
# partial string substitution by argument
>>> f('{} {bar}', 1)
'1 {bar}'
>>> f('{foo} {}', 1)
'{foo} 1'
# partial string substitution with arguments and keyword mixed
>>> f('{foo} {} {bar} {}', '|', bar='BAR')
'{foo} | BAR {}'
# partial string substitution with extra keyword
>>> f('{foo} {bar}', foo="FOO", bro="BRO")
'FOO {bar}'
# you can simply 'pour out' your dictionary to format function
>>> kwargs = {'foo': 'FOO', 'bro': 'BRO'}
>>> f('{foo} {bar}', **kwargs)
'FOO {bar}'
And here is my code:
from string import Formatter
class FormatTuple(tuple):
def __getitem__(self, key):
if key + 1 > len(self):
return "{}"
return tuple.__getitem__(self, key)
class FormatDict(dict):
def __missing__(self, key):
return "{" + key + "}"
def f(string, *args, **kwargs):
"""
String safe substitute format method.
If you pass extra keys they will be ignored.
If you pass incomplete substitute map, missing keys will be left unchanged.
:param string:
:param kwargs:
:return:
>>> f('{foo} {bar}', foo="FOO")
'FOO {bar}'
>>> f('{} {bar}', 1)
'1 {bar}'
>>> f('{foo} {}', 1)
'{foo} 1'
>>> f('{foo} {} {bar} {}', '|', bar='BAR')
'{foo} | BAR {}'
>>> f('{foo} {bar}', foo="FOO", bro="BRO")
'FOO {bar}'
"""
formatter = Formatter()
args_mapping = FormatTuple(args)
mapping = FormatDict(kwargs)
return formatter.vformat(string, args_mapping, mapping)
Reading @Sam Bourne comment, I modified @SvenMarnach's code
to work properly with coercion (like {a!s:>2s}
) without writing a custom parser.
The basic idea is not to convert to strings but concatenate missing keys with coercion tags.
import string
class MissingKey(object):
def __init__(self, key):
self.key = key
def __str__(self): # Supports {key!s}
return MissingKeyStr("".join([self.key, "!s"]))
def __repr__(self): # Supports {key!r}
return MissingKeyStr("".join([self.key, "!r"]))
def __format__(self, spec): # Supports {key:spec}
if spec:
return "".join(["{", self.key, ":", spec, "}"])
return "".join(["{", self.key, "}"])
def __getitem__(self, i): # Supports {key[i]}
return MissingKey("".join([self.key, "[", str(i), "]"]))
def __getattr__(self, name): # Supports {key.name}
return MissingKey("".join([self.key, ".", name]))
class MissingKeyStr(MissingKey, str):
def __init__(self, key):
if isinstance(key, MissingKey):
self.key = "".join([key.key, "!s"])
else:
self.key = key
class SafeFormatter(string.Formatter):
def __init__(self, default=lambda k: MissingKey(k)):
self.default=default
def get_value(self, key, args, kwds):
if isinstance(key, str):
return kwds.get(key, self.default(key))
else:
return super().get_value(key, args, kwds)
Use (for example) like this
SafeFormatter().format("{a:<5} {b:<10}", a=10)
The following tests (inspired by tests from @norok2) check the output for the traditional format_map
and a safe_format_map
based on the class above in two cases: providing correct keywords or without them.
def safe_format_map(text, source):
return SafeFormatter().format(text, **source)
test_texts = (
'{a} ', # simple nothing useful in source
'{a:5d}', # formatting
'{a!s}', # coercion
'{a!s:>{a}s}', # formatting and coercion
'{a:0{a}d}', # nesting
'{d[x]}', # indexing
'{d.values}', # member
)
source = dict(a=10,d=dict(x='FOO'))
funcs = [safe_format_map,
str.format_map
#safe_format_alt # Version based on parsing (See @norok2)
]
n = 18
for text in test_texts:
# full_source = {**dict(b='---', f=dict(g='Oh yes!')), **source}
# print('{:>{n}s} : OK : '.format('str.format_map', n=n) + text.format_map(full_source))
print("Testing:", text)
for func in funcs:
try:
print(f'{func.__name__:>{n}s} : OK\t\t\t: ' + func(text, dict()))
except:
print(f'{func.__name__:>{n}s} : FAILED')
try:
print(f'{func.__name__:>{n}s} : OK\t\t\t: ' + func(text, source))
except:
print(f'{func.__name__:>{n}s} : FAILED')
Which outputs
Testing: {a}
safe_format_map : OK : {a}
safe_format_map : OK : 10
format_map : FAILED
format_map : OK : 10
Testing: {a:5d}
safe_format_map : OK : {a:5d}
safe_format_map : OK : 10
format_map : FAILED
format_map : OK : 10
Testing: {a!s}
safe_format_map : OK : {a!s}
safe_format_map : OK : 10
format_map : FAILED
format_map : OK : 10
Testing: {a!s:>{a}s}
safe_format_map : OK : {a!s:>{a}s}
safe_format_map : OK : 10
format_map : FAILED
format_map : OK : 10
Testing: {a:0{a}d}
safe_format_map : OK : {a:0{a}d}
safe_format_map : OK : 0000000010
format_map : FAILED
format_map : OK : 0000000010
Testing: {d[x]}
safe_format_map : OK : {d[x]}
safe_format_map : OK : FOO
format_map : FAILED
format_map : OK : FOO
Testing: {d.values}
safe_format_map : OK : {d.values}
safe_format_map : OK : <built-in method values of dict object at 0x7fe61e230af8>
format_map : FAILED
format_map : OK : <built-in method values of dict object at 0x7fe61e230af8>
TL;DR: Problem: defaultdict
fails for {foobar[a]}
if foobar
is not set:
from collections import defaultdict
text = "{bar}, {foo}, {foobar[a]}" # {bar} is set, {foo} is "", {foobar[a]} fails
text.format_map(defaultdict(str, bar="A")) # TypeError: string indices must be integers
Solution: Copy DefaultWrapper
class from the Edit, then:
text = "{bar}, {foo}, {foobar[a]}"
text.format_map(DefaultWrapper(bar="A")) # "A, , " (missing replaced with empty str)
# Even this works:
foobar = {"c": "C"}
text = "{foobar[a]}, {foobar[c]}"
text.format_map(DefaultWrapper(foobar=foobar)) # ", C" missing indices are also replaced
Note that indexing and attribute access does not work in one of the posted solutions. The following code raises a TypeError: string indices must be integers
.
from collections import defaultdict
text = "{foo} '{bar[index]}'"
text.format_map(defaultdict(str, foo="FOO")) # raises a TypeError
To solve this problem one can use the collections.defaultdict
solution together with a custom default value object which supports indexing. The DefaultWrapper
object returns itself on index and attribute access which allows to index/use attributes unlimited times without errors.
Note that this can be exteded to allow containers that contain parts of the requested values. Check out the Edit below.
class DefaultWrapper:
def __repr__(self):
return "Empty default value"
def __str__(self):
return ""
def __format__(self, format_spec):
return ""
def __getattr__(self, name):
return self
def __getitem__(self, name):
return self
def __contains__(self, name):
return True
text = "'{foo}', '{bar[index][i]}'"
print(text.format_map(defaultdict(DefaultWrapper, foo="FOO")))
# 'FOO', ''
The above class can be extended to support partly filled containers. So for example
text = "'{foo[a]}', '{foo[b]}'"
foo = {"a": "A"}
print(text.format_map(defaultdict(DefaultWrapper, foo=foo)))
# KeyError: 'b'
The idea is to replace the defaultdict
completely with the DefaultWrapper
. The DefaultWrapper
object wraps around the container
returning the containers requested value (wrapped with a DefaultWrapper
object) or the container as a string. This way an infinite depth of the map is immitated but all present values are returned.
The kwargs
is added for convenience only. This way it looks more like the defaultdict
solution.
class DefaultWrapper:
"""A wrapper around the `container` to allow accessing with a default value."""
ignore_str_format_errors = True
def __init__(self, container="", **kwargs):
self.container = container
self.kwargs = kwargs
def __repr__(self):
return "DefaultWrapper around '{}'".format(repr(self.container))
def __str__(self):
return str(self.container)
def __format__(self, format_spec):
try:
return self.container.__format__(format_spec)
except TypeError as e:
if DefaultWrapper.ignore_str_format_errors or self.container == "":
return str(self)
else:
raise e
def __getattr__(self, name):
try:
return DefaultWrapper(getattr(self.container, name))
except AttributeError:
return DefaultWrapper()
def __getitem__(self, name):
try:
return DefaultWrapper(self.container[name])
except (TypeError, LookupError):
try:
return DefaultWrapper(self.kwargs[name])
except (TypeError, LookupError):
return DefaultWrapper()
def __contains__(self, name):
return True
Now all the shown examples work without errors:
text = "'{foo[a]}', '{foo[b]}'"
foo = {"a": "A"}
print(text.format_map(DefaultWrapper(foo=foo)))
# 'A', ''
text = "'{foo}', '{bar[index][i]}', '{foobar[a]}', '{foobar[b]}'"
print(text.format_map(DefaultWrapper(foo="Foo", foobar={"a": "A"})))
# 'FOO', '', 'A', ''
# the old way still works the same as before
from collections import defaultdict
text = "'{foo}', '{bar[index][i]}'"
print(text.format_map(defaultdict(DefaultWrapper, foo="FOO")))
# 'FOO', ''
This is how we managed to do this:
import traceback
def grab_key_from_exc(exc):
last_line_idx = exc[:-1].rfind('\n')
last_line = exc[last_line_idx:]
quote_start = last_line.find("'")
quote_end = last_line.rfind("'")
key_name = last_line[quote_start+1:quote_end]
return key_name
def partial_format(input_string, **kwargs):
while True:
try:
return input_string.format(**kwargs)
except:
exc = traceback.format_exc()
key = grab_key_from_exc(exc)
kwargs[key] = '{'+key+'}'
The easiest and simplest solution would be to do the following:
test = "I have {statement} with a {todo} steps"
t1 = test.format(statement="{statement}",todo=" steps 1 2 3")
print(t1)
t2 = t1.format(statement="some statement")
print(t2)
Output:
I have {statement} with a steps 1 2 3 steps
I have some statement with a steps 1 2 3 steps
def partial_format(string, **kwargs):
for k, v in kwargs.items():
string = string.replace('{'+k+'}', str(v))
return string
partial_format('{foo} {bar}', foo='FOO')
>>> 'FOO {bar}'
You could wrap it in a function that takes default arguments:
def print_foo_bar(foo='', bar=''):
s = '{foo} {bar}'
return s.format(foo=foo, bar=bar)
print_foo_bar(bar='BAR') # ' BAR'