I am using python3 and beautifulsoup to scrape a website but i got this error. I tried to fix this using the solutions given in other answers but none solves my problem.
# -*- coding: utf-8 -*-
import os
import locale
os.environ["PYTHONIOENCODING"] = "utf-8"
myLocale=locale.setlocale(category=locale.LC_ALL, locale="en_GB.UTF-8")
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import pandas as pd
def getrank (animeurl):
html = urlopen(animeurl)
bslink = BeautifulSoup(html.read(), 'html.parser')
rank = bslink.find('span', {'class' : 'numbers ranked'}).get_text().replace('Ranked #', '')
def spring19():
html = urlopen('https://...')
bs = BeautifulSoup(html.read(), 'html.parser')
link = []
for x in bs.find_all('a', {'class' : 'link-title'}):
link.append(x.get("href"))
ranklist = []
for x in link:
x.encode(encoding='UTF-8',errors='ignore')
ranklist.append(getrank(x))
return ranklist
spring19()
the error message is : UnicodeEncodeError: 'ascii' codec can't encode character '\u2159' in position 32: ordinal not in range(128)
The reason why this error showed up is that there are some symbols in the urls i scraped. But I still have no idea how should i fix it.
Thanks a lot!