I am trying to make requests bases on a list of url, then check the content of the response for each. I found this snipped on SO which seemed to work well, if I output the response of the request I get the corect result, but as soon as I want to return the content it is empty... why?
import pandas as pd
import concurrent.futures
import requests
import time
out = []
CONNECTIONS = 100
TIMEOUT = 5
tlds = open('rez.txt').read().splitlines()
urls = ['http://{}'.format(x) for x in tlds[1:]]
def load_url(url, timeout):
ans = requests.head(url, timeout=timeout)
return ans
with concurrent.futures.ThreadPoolExecutor(max_workers=CONNECTIONS) as executor:
future_to_url = (executor.submit(load_url, url, TIMEOUT) for url in urls)
time1 = time.time()
for future in concurrent.futures.as_completed(future_to_url):
try:
data = future.result()
#why is data.content empty??
print(data.content)
except Exception as exc:
data = str(type(exc))
finally:
out.append(data)
#print(out)
print(str(len(out)),end="\r")
time2 = time.time()
print(f'Took {time2-time1:.2f} s')
print(pd.Series(out).value_counts())
Output of this code:
b''
b''
b''
b''
b''
b''
b''
b''
b''
b''
b''
b''
Original thread: Getting HEAD content with Python Requests