def fetch_html(url):
# ungood idea to assume its UTF-8. Try to read header
try:
fp = urllib.request.urlopen(url)
fpbytes = fp.read()
html = fpbytes.decode("utf8")
fp.close()
print("Success! {} chars found".format(len(html)))
return html
except:
print("Failed to extract html, retrying again in a few seconds")
time.sleep(3.5)
fetch_html(url)
url = "https://i.reddit.com/r/AskReddit/top/.compact?sort=top&t=day"
html = fetch_html(url)
print(html)
html is still None despite it giving 70000 in len(html), What gives? I tried switching the order, placing fp.close() after return html, but it still gives the same error.
I have searched for this in google, though their issue comes from not using return on their values, which is different in this question.
SOLVED: https://gist.github.com/carcigenicate/ff1523fa66602a1c47b7c5ae4d6f1e92
def fetch_html(url):
while True:
try:
fp = urllib.request.urlopen(url)
fpbytes = fp.read()
html = fpbytes.decode("utf8")
fp.close()
print("Success! {} chars found".format(len(html)))
return html
except:
print("Failed to extract html, retrying again in a few seconds")
time.sleep(3.5)