Running on Windows, this code has worked for a while, but crashes with a ValueError: too many file descriptors in select()
if workload is too high.
import aiohttp
import asyncio
class Multiple_HTTP:
#----------------------------------------------------------------------------------
@staticmethod
async def fetch(session,url):
try:
async with session.get(url) as response:
status = response.status
text = await response.text()
return (url,status,text)
except Exception as e:
return e
#----------------------------------------------------------------------------------
@staticmethod
async def fetch_all(urls,timeout):
loop = asyncio.get_event_loop()
session_timeout = aiohttp.ClientTimeout(total=None,sock_connect=timeout,sock_read=timeout)
async with aiohttp.ClientSession(loop=loop , timeout=session_timeout , connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
cwlist = [loop.create_task(Multiple_HTTP.fetch(session,url)) for url in urls]
results = []
results = await asyncio.gather(*cwlist,return_exceptions=True)
return results
#----------------------------------------------------------------------------------
@staticmethod
def run(urls,timeout=5):
return asyncio.run(Multiple_HTTP.fetch_all(urls,timeout))
#----------------------------------------------------------------------------------
Based on this other question (Python asyncio/aiohttp: ValueError: too many file descriptors in select() on Windows), I modified to use a ProactorEventLoop and an asyncio.Semaphore, such as:
import aiohttp
import asyncio
import sys
class Multiple_HTTP:
#----------------------------------------------------------------------------------
@staticmethod
async def fetch(session,url,semaphore):
try:
async with semaphore , session.get(url) as response:
status = response.status
text = await response.text()
return (url,status,text)
except Exception as e:
return e
#----------------------------------------------------------------------------------
@staticmethod
async def fetch_all(urls,timeout,N):
semaphore = asyncio.Semaphore(N)
loop = None
tcp_limit = N
if(sys.platform=='win32'):
tcp_limit = 60
loop = asyncio.ProactorEventLoop()
asyncio.set_event_loop(loop)
loop = asyncio.get_event_loop()
session_timeout = aiohttp.ClientTimeout(total=None,sock_connect=timeout,sock_read=timeout)
async with aiohttp.ClientSession(loop=loop , timeout=session_timeout , connector=aiohttp.TCPConnector(verify_ssl=False,limit=tcp_limit)) as session:
cwlist = [loop.create_task(Multiple_HTTP.fetch(session,url,semaphore)) for url in urls]
results = []
results = await asyncio.gather(*cwlist,return_exceptions=True)
return results
#----------------------------------------------------------------------------------
@staticmethod
def run(urls,timeout=5,N=60):
return asyncio.run(Multiple_HTTP.fetch_all(urls,timeout,N))
#----------------------------------------------------------------------------------
But now the ValueError: too many file descriptors in select()
is happening when running for a couple of minutes, instead of 3 hours as in previous version.
How should I correct my code to contain concurrency so that ValueError is not raised?