I've been trying for a long time to write the results to my file, but since it's a multithreaded task, the files are written in a mixed way
The task that adds the file is in the get_url function
And this fonction is launched via pool.submit(get_url,line)
import requests
from concurrent.futures import ThreadPoolExecutor
import fileinput
from bs4 import BeautifulSoup
import traceback
import threading
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import warnings
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
warnings.filterwarnings("ignore", category=UserWarning, module='bs4')
count_requests = 0
host_error = 0
def get_url(url):
try:
global count_requests
result_request = requests.get(url, verify=False)
soup = BeautifulSoup(result_request.text, 'html.parser')
with open('outfile.txt', 'a', encoding="utf-8") as f:
f.write(soup.title.get_text())
count_requests = count_requests + 1
except:
global host_error
host_error = host_error + 1
with ThreadPoolExecutor(max_workers=100) as pool:
for line in fileinput.input(['urls.txt']):
pool.submit(get_url,line)
print(str("requests success : ") + str(count_requests) + str(" | requests error ") + str(host_error), end='\r')
This is what the output looks like :
google.com - Google
w3schools.com - W3Schools Online Web Tutorials