I have a problem with converting text to csv after scraping. The point is, there are french letters which in .csv file ends up as "é" "É" and so on. How can I decode it so they appear for example as english letters? or are properly scraped to file?
from urllib.request import urlopen
from bs4 import BeautifulSoup as BS
from urllib import request
import pandas as pd
import os
import re
html = request.urlopen("https://en.wikipedia.org/wiki/Jean_Dieudonn%C3%A9")
bs = BS(html.read(), 'html.parser')
data = pd.DataFrame({'name':[],'known for':[],)}
try:
name = bs.find('h1').text
except:
name = ''
try:
known= bs.select_one('th:contains("Known")').next_sibling.get_text('\n').split('\n') #ends up with even more weird signs
except:
known = ''
x = {'name': name, 'known for': known}
data = data.append(x, ignore_index = True)
data.to_csv('files.csv', sep=",", index=True)
Thanks for any ideas