I want to read several .text documents but got some error on the line
lyrics = "".join(f.readlines())
The error is:
UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 1148: character maps to <undefined>
How can I fix it. It would be helpful if anyone fixes it.
My code function is:
def read_lyrics():
reg1 = re.compile("\.txt$")
reg2 = re.compile("([0-9]+)\.txt")
reg3 = re.compile(".*_([0-9])\.txt")
reg4 = re.compile("\[.+\]")
reg5 = re.compile("info\.txt")
lyrics_dictionary = {}
#iter all directory and load all song(txt file)
for i in os.listdir():
if os.path.isdir(i):
for path,sub,items in os.walk(i):
if any([reg1.findall(item) for item in items]):
for item in items:
if reg5.findall(item):
continue
if reg3.findall(item):
num = ["0"+reg3.findall(item)[0]]
name = "_".join(path.split("/") + num)
else:
name = "_".join(path.split("/") + reg2.findall(item))
print("The path is: ", path)
print("The item is: ", item)
with open(os.path.join(path,item),"r") as f:
print("The file path is: ", f)
lyrics = "".join(f.readlines())
lyrics = reg4.subn("",lyrics)[0]
lyrics_dictionary[name] = lyrics
return lyrics_dictionary