Something like this, (or you can also use Scrapy)
It will easily allow you to make a lot of requests in parallel provided the server can handle it as well;
# it's just a wrapper around concurrent.futures ThreadPoolExecutor with a nice tqdm progress bar!
from tqdm.contrib.concurrent import thread_map, process_map # for multi-threading, multi-processing respectively)
def chunk_list(lst, size):
for i in range(0, len(lst), size):
yield lst[i:i + size]
for idx, my_chunk in enumerate(chunk_list(huge_list, size=2**12)):
for response in thread_map(<which_func_to_call>, my_chunk, max_workers=your_cpu_cores+6)):
# which_func_to_call -> wrap the returned response json obj in this, etc
# do something with the response now..
# make sure to cache the chunk results as well (in case you are having lot of them)
OR
Using the pool from multi-processing module in Python..
from multiprocessing import Pool
import requests
from bs4 import BeautifulSoup
base_url = 'http://quotes.toscrape.com/page/'
all_urls = list()
def generate_urls():
# better to yield them as well if you already have the URL's list etc..
for i in range(1,11):
all_urls.append(base_url + str(i))
def scrape(url):
res = requests.get(url)
print(res.status_code, res.url)
generate_urls()
p = Pool(10)
p.map(scrape, all_urls)
p.terminate()
p.join()