Here is what I'm currently using to open a various file that the user has:
# check the encoding quickly
with open(file, 'rb') as fp:
start_data = fp.read(4)
if start_data.startswith(b'\x00\x00\xfe\xff'):
encoding = 'utf-32'
elif start_data.startswith(b'\xff\xfe\x00\x00'):
encoding = 'utf-32'
elif start_data.startswith(b'\xfe\xff'):
encoding = 'utf-16'
elif start_data.startswith(b'\xff\xfe'):
encoding = 'utf-16'
else:
encoding = 'utf-8'
# open the file with that encoding
with open(file, 'r', encoding=encoding) as fp:
do_something()
Would there be a better way than the above to properly open an unknown utf file?