from bs4 import BeautifulSoup
from urllib.request import urlopen
response = urlopen('https://tieba.baidu.com/f?kw=比特币&ie=utf-8&tab=good')
soup = BeautifulSoup(response, 'html.parser', 'utf-8')
for anchor in soup.select('.j_th_tit'):
print(anchor.get_text())
Here is the error message.
Traceback (most recent call last):
File "index.py", line 4, in <module>
response = urlopen('https://tieba.baidu.com/f?kw=比特币&ie=utf-8&tab=good')
File "/usr/local/lib/python3.7/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/usr/local/lib/python3.7/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/usr/local/lib/python3.7/urllib/request.py", line 543, in _open
'_open', req)
File "/usr/local/lib/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "/usr/local/lib/python3.7/urllib/request.py", line 1360, in https_open
context=self._context, check_hostname=self._check_hostname)
File "/usr/local/lib/python3.7/urllib/request.py", line 1317, in do_open
encode_chunked=req.has_header('Transfer-encoding'))
File "/usr/local/lib/python3.7/http/client.py", line 1244, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/usr/local/lib/python3.7/http/client.py", line 1255, in _send_request
self.putrequest(method, url, **skips)
File "/usr/local/lib/python3.7/http/client.py", line 1122, in putrequest
self._output(request.encode('ascii'))
UnicodeEncodeError: 'ascii' codec can't encode characters in position 10-12: ordinal not in range(128)
I want to crawl some texts from Baidu but it didn't work well. I guess that the problem occurs while processing Chinese letters. How can I solve this problem?