You could write a custom encode function that converts a utf-8 character to a ascii character specified in a look up table.
# -*- coding: utf-8 -*-
import io
def encode_file(filepath, conversion_table={}):
''' replaces utf-8 chars with specified equivalent ascii char'''
with io.open(text_path, "r", encoding="utf-8") as f:
transcript = f.read()
new_transcript = ""
for i in transcript:
new_char = ""
# append character if ascii
try:
new_transcript += i.encode("ascii")
except UnicodeEncodeError:
found_char = False
for c in conversion_table:
# replace utf-8 with custom ascii equivalent
if i == unicode(c, encoding="utf-8"):
new_transcript += conversion_table[c]
found_char = True
# no conversion found
if found_char == False:
new_transcript += "?"
return new_transcript
text_path = "/path/to/file.txt"
conversion_table = {'ü':'u', 'ô':'o', 'é':'e', 'į':'i'}
print (encode_file(text_path, conversion_table))
For example, with a file that has contents my ünicôdé strįng
yields my unicode string
.
So, you could add '’':'\''
(or whatever conversion) to the conversion_table
and it will do the replacement for you.