If you only want stable UTF-8
support on file read
/write
without same declarations everywhere, here are two solutions:
1. Patch io
module at runtime (danger operation at your own risk)
import pathlib as pathlib
import tempfile
import chardet
def patchIOWithUtf8Default():
import builtins
import importlib.util
import sys
spec = importlib.util.find_spec("io")
module = importlib.util.module_from_spec(spec)
exec(compile(spec.loader.get_source(spec.name) + """
def open(*args, **kwargs):
args = list(args)
mode = kwargs.get('mode', (args + [''])[1])
if (len(args) < 4 and 'b' not in mode) or 'encoding' in kwargs:
kwargs['encoding'] = 'utf8'
elif len(args) >= 4 and args[3] is None:
args[3] = 'utf8'
return _io.open(*args, **kwargs)
""", module.__spec__.origin, "exec"), module.__dict__)
sys.modules[module.__name__] = module
builtins.open = __import__("io").open
importlib.reload(importlib.import_module("pathlib"))
def main():
patchIOWithUtf8Default()
filename = tempfile.mktemp()
text = "Common\n常\nSense\n识\n天地玄黄"
print("Original text:", repr(text))
pathlib.Path(filename).write_text(text)
encoding = chardet.detect(open(filename, mode="rb").read())["encoding"]
print("Written encoding by pathlib:", encoding)
print("Written text by pathlib:", repr(open(filename, newline="", encoding=encoding).read()))
if __name__ == '__main__':
main()
Sample output:
Original text: 'Common\n常\nSense\n识\n天地玄黄'
Written encoding by pathlib: utf-8
Written text by pathlib: 'Common\r\n常\r\nSense\r\n识\r\n天地玄黄'
2. Use 3rd library as pathlib wrapper
https://github.com/baijifeilong/IceSpringPathLib
pip install IceSpringPathLib
import pathlib
import tempfile
import chardet
import IceSpringPathLib
tempfile.mktemp()
filename = tempfile.mktemp()
text = "Common\n常\nSense\n识\n天地玄黄"
print("Original text:", repr(text))
pathlib.Path(filename).write_text(text)
encoding = chardet.detect(open(filename, mode="rb").read())["encoding"]
print("\nWritten text by pathlib:", repr(open(filename, newline="", encoding=encoding).read()))
print("Written encoding by pathlib:", encoding)
IceSpringPathLib.Path(filename).write_text(text)
encoding = chardet.detect(open(filename, mode="rb").read())["encoding"]
print("\nWritten text by IceSpringPathLib:", repr(open(filename, newline="", encoding=encoding).read()))
print("Written encoding by IceSpringPathLib:", encoding)
Sample output:
Original text: 'Common\n常\nSense\n识\n天地玄黄'
Written text by pathlib: 'Common\r\n常\r\nSense\r\n识\r\n天地玄黄'
Written encoding by pathlib: GB2312
Written text by IceSpringPathLib: 'Common\n常\nSense\n识\n天地玄黄'
Written encoding by IceSpringPathLib: utf-8