It turns out that I was able to modify the dnozay answer to the "Any yaml libraries in Python that support dumping of long strings as block literals or folded blocks?" question.
It turns out to be a bit faster than flyx answer, though you needs some additional tricks (borrowed with modification from drbild/json2yaml) to preserve the order of keys.
The major part is to use Representer.add_representer
:
class maybe_literal_str(str): pass
class maybe_literal_unicode(unicode): pass
def change_maybe_style(representer):
def new_maybe_representer(dumper, data):
scalar = representer(dumper, data)
if isinstance(data, basestring) and "\n" in data:
scalar.style = '|'
else:
scalar.style = None
return scalar
return new_maybe_representer
from yaml.representer import SafeRepresenter
# represent_str does handle some corner cases, so use that
# instead of calling represent_scalar directly
represent_maybe_literal_str = change_maybe_style(SafeRepresenter.represent_str)
represent_maybe_literal_unicode = change_maybe_style(SafeRepresenter.represent_unicode)
# I needed to use it in yaml.safe_dump() with older PyYAML,
# hence explicit Dumper=yaml=SafeDumper
yaml.add_representer(maybe_literal_str, represent_maybe_literal_str,
Dumper=yaml.SafeDumper)
yaml.add_representer(maybe_literal_unicode, represent_maybe_literal_unicode,
Dumper=yaml.SafeDumper)
For it to work I had to wrap strings with one of those two classes:
def wrap_strings(arg):
"""Wrap {str,unicode} arguments in maybe_literal_{str,unicode}"""
if isinstance(arg, str):
return maybe_literal_str(arg)
elif isinstance(arg, unicode):
return maybe_literal_unicode(arg)
else:
return arg
I have used this hacky function to modify the structure
def transform(obj, leaf_callback):
try:
# is it dict or something like it?
enum = obj.iteritems()
except AttributeError:
# if not dict-like, it is list-like object
enum = enumerate(obj)
for k, v in enum:
# is value 'v' collection or scalar (leaf value)?
if isinstance(v, (dict, list)):
transform(v, leaf_callback)
else:
newval = leaf_callback(v)
if newval is not None:
obj[k] = newval
The conversion from JSON to YAML was done with:
def convert_dom(json_file, yaml_file):
loaded_json = json.load(json_file)
transform(loaded_json, wrap_strings)
yaml.safe_dump(loaded_json, yaml_file,
explicit_start=True, # start with "---\n"
default_flow_style=False)
with open('in.json', 'r') as json_file:
with open('out.yaml', 'w') as yaml_file:
convert_events(json_file, yaml_file)