When I use third party aiobotocore it works up to NUM_WORKERS=500 and If I want to go up to a 1000 I get this error:
r, w, _ = self._select(self._readers, self._writers, [], timeout)
File ".....\lib\selectors.py", line 314, in _select
r, w, x = select.select(r, w, w, timeout)
ValueError: too many file descriptors in select()
If there a way to execute 1000 in parallel?
Source:
import os, sys, time, json
import asyncio
from itertools import chain
from typing import List
import logging
from functools import partial
from pprint import pprint
# Third Party
import asyncpool
import aiobotocore.session
import aiobotocore.config
_NUM_WORKERS=500
async def execute_lambda( lambda_name: str, key: str, client):
# Get json content from s3 object
if 1:
name=lambda_name
response = await client.invoke(
InvocationType='RequestResponse',
FunctionName=name,
LogType='Tail',
Payload=json.dumps({
'exec_id':key,
})
)
out=[]
async for event in response['Payload']:
out.append(event.decode())
#await asyncio.sleep(1)
return out
async def submit(lambda_name: str) -> List[dict]:
"""
Returns list of AWS Lambda outputs executed in parallel
:param name: name of lambda function
:return: list of lambda returns
"""
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
session = aiobotocore.session.AioSession()
config = aiobotocore.config.AioConfig(max_pool_connections=_NUM_WORKERS)
contents = []
#client = boto3.client('lambda', region_name='us-west-2')
async with session.create_client('lambda', region_name='us-west-2', config=config) as client:
worker_co = partial(execute_lambda, lambda_name)
async with asyncpool.AsyncPool(None, _NUM_WORKERS, 'lambda_work_queue', logger, worker_co,
return_futures=True, raise_on_join=True, log_every_n=10) as work_pool:
for x in range(_NUM_WORKERS):
contents.append(await work_pool.push(x, client))
# retrieve results from futures
contents = [c.result() for c in contents]
return list(chain.from_iterable(contents))
def main(name, files):
s = time.perf_counter()
_loop = asyncio.get_event_loop()
_result = _loop.run_until_complete(submit(name))
pprint(_result)
elapsed = time.perf_counter() - s
print(f"{__file__} executed in {elapsed:0.2f} seconds.")
Lambda function:
import time
def lambda_handler(event, context):
time.sleep(10)
return {'code':0, 'exec_id':event['exec_id']}
Result:
'{"code": 0, "exec_id": 0}',
'{"code": 0, "exec_id": 1}',
'{"code": 0, "exec_id": 2}',
'{"code": 0, "exec_id": 3}',
...
'{"code": 0, "exec_id": 496}',
'{"code": 0, "exec_id": 497}',
'{"code": 0, "exec_id": 498}',
'{"code": 0, "exec_id": 499}']
my_cli_script.py executed in 14.56 seconds.