The following site status-checking script keeps getting stuck at around 99%.
Timeout has been assigned as None to mitigate this, but the problem recurs nevertheless.
What is causing the script to fail at the end, and how can I make it so that Python can know when asyncio is stuck and do something about it?
import aiohttp, asyncio, platform, requests, re
from aiohttp import ClientSession, TCPConnector
import asyncio
from operator import contains
from tqdm import tqdm as tqdm
from tqdm.asyncio import tqdm as asyncProgress
from requests import Session
session = Session()
internet_archive = "https://web.archive.org/cdx/search/cdx?url=twitter.com/iamcardib/status&matchType=prefix&filter=statuscode:200&mimetype:text/html&from=20220302"
cdx = requests.get(internet_archive).text
urls = re.findall(r'https?://twitter\.com/(?:#!/)?\w+/status/\d+', cdx)
async def CheckSiteStatus(url, session: ClientSession, Sem: asyncio.Semaphore):
async with Sem:
async with session.get(url) as resp:
return resp.real_url, resp.status
async def StatusCheck(links_list):
statuses = []
timeout = aiohttp.ClientTimeout(total=None)
async with ClientSession(timeout=timeout) as session:
Sem = asyncio.Semaphore(15)
statuses = await asyncProgress.gather(*(CheckSiteStatus(u, session, Sem) for u in links_list))
return statuses
if platform.system() == 'Windows':
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
end_list = asyncio.run(StatusCheck(urls))