in this part of code , I fetch alot of URLs from stored URLs in (url.txt) file but when the code face non-English URL, it broke and give error,
any simple code to fix this problem ? Thanks
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as uReq
page_url = "url.txt"
with open("url.txt", "r") as fr:
for url in map(lambda x: x.strip(), fr.readlines()):
print(url)
uClient = uReq(url)
page_soup = soup(uClient.read(), "html.parser")
# the rest logic
Full error message
E:\Desktop>question.py
Traceback (most recent call last):
File "E:\Desktop\question.py", line 12, in <module>
uClient = uReq(url)
File "C:\Users\A-Data\AppData\Local\Programs\Python\Python38-32\lib\urllib\req
uest.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\A-Data\AppData\Local\Programs\Python\Python38-32\lib\urllib\req
uest.py", line 525, in open
response = self._open(req, data)
File "C:\Users\A-Data\AppData\Local\Programs\Python\Python38-32\lib\urllib\req
uest.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "C:\Users\A-Data\AppData\Local\Programs\Python\Python38-32\lib\urllib\req
uest.py", line 502, in _call_chain
result = func(*args)
File "C:\Users\A-Data\AppData\Local\Programs\Python\Python38-32\lib\urllib\req
uest.py", line 1362, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "C:\Users\A-Data\AppData\Local\Programs\Python\Python38-32\lib\urllib\req
uest.py", line 1319, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "C:\Users\A-Data\AppData\Local\Programs\Python\Python38-32\lib\http\clien
t.py", line 1230, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Users\A-Data\AppData\Local\Programs\Python\Python38-32\lib\http\clien
t.py", line 1241, in _send_request
self.putrequest(method, url, **skips)
File "C:\Users\A-Data\AppData\Local\Programs\Python\Python38-32\lib\http\clien
t.py", line 1096, in putrequest
self._output(self._encode_request(request))
File "C:\Users\A-Data\AppData\Local\Programs\Python\Python38-32\lib\http\clien
t.py", line 1176, in _encode_request
return request.encode('ascii')
UnicodeEncodeError: 'ascii' codec can't encode characters in position 15-18: ord
inal not in range(128)