I am trying to search for certain Chinese characters in a site, but it always comes up as not found. Here is my code that I have so far. Site is in Chinese
from random import randint
for _ in range(1):
value = randint(100000000, 999999999)
# print(value)
#Gets link + puts together
shop = 'https://shop'
taobao = '.taobao.com'
tempLink = 'https://shop357612815.taobao.com/'
link = shop + str(value) + taobao
#request stuff
from urllib.request import urlopen
import urllib.request
#search word list
words = ['2017', '2018', '2019', 'tide brand', 'taobao', '.00', 'palace', 'ader error',
'vlone', 'fog', 'fear of god', 'assc', 'anti', '4.', '5.', '首页']
#searcher
site = urllib.request.urlopen(link).read().decode('utf-8', errors = 'ignore')
for word in words:
if word in site:
print(word, link)
If I remove the errors = 'ignore'
part it then stops working and gives the error code:
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb5 in position 267: invalid start byte