How to exit chrome sessions using threadpool?

Question

#scrape.py
import threading
from selenium import webdriver
from selenium.common.exceptions import *
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options


threadLocal = threading.local()

def get_driver():
    browser = getattr(threadLocal, 'browser', None)
    if browser is None:
        chrome_options = Options()
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument("--headless")
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument("--lang=en")
        chrome_options.add_argument("--start-maximized")
        chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
        chrome_options.add_experimental_option('useAutomationExtension', False)
        chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36")
        chrome_options.binary_location = "/usr/bin/google-chrome"
        browser = webdriver.Chrome(executable_path=r'/usr/local/bin/chromedriver', options=chrome_options)
        setattr(threadLocal, 'browser', browser)
    return browser

def run_scrape(link):
    browser = get_driver()
    browser.get(<link passed here>)
    try:
        #scrape process
    except:
        #other stuffs

#multiprocess.py
from scrape import run_scrape
from multiprocessing.pool import ThreadPool
if __name__ == '__main__':
    start_time = time.time()
    #links = list of links to be scraped
    pool = ThreadPool(20)
    results = pool.map(run_scrape, links)
    print("Total Time Processed: "+"--- %s seconds ---" % (time.time() - start_time))

The chromedriver is exiting after the workers finished now my question is there are instances of chrome browsers that doesn't exited? Is there a way that i can store it also in the threadLocal?

undetected Selenium · Answer 1 · 2020-02-13T10:29:11.263

As you instantiate one WebDriver instance for each thread, though this approach will launch multiple Browsing Contexts, conceptually your program looks good to go.

To exit the ChromeDriver / Chrome sessions from the threadpool, you have to invoke quit() individually for each worker thread which gets instantiated. However, I would avoid any attempt to store any session with in the threadLocal to avoid a potential crash.

So, I modified your code trial a bit adding quit() which will be invoked individually for each individual worker thread instantiate and here is the execution results:

Code Block:

multiprocess.py:

from scrape import run_scrape
from multiprocessing.pool import ThreadPool
import time

if __name__ == '__main__':
    start_time = time.time()
    links = ["https://selenium.dev/downloads/", "https://selenium.dev/documentation/en/"] 
    pool = ThreadPool(20)
    results = pool.map(run_scrape, links)
    print("Total Time Processed: "+"--- %s seconds ---" % (time.time() - start_time))

scrape.py:

#scrape.py
import threading
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.chrome.options import Options


threadLocal = threading.local()

def get_driver():
    browser = getattr(threadLocal, 'browser', None)
    if browser is None:
    chrome_options = Options()
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument("--headless")
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument("--lang=en")
    chrome_options.add_argument("--start-maximized")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36")
    chrome_options.binary_location=r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe'
    browser = webdriver.Chrome(executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe', options=chrome_options)
    setattr(threadLocal, 'browser', browser)
    return browser

def run_scrape(link):
    browser = get_driver()
    browser.get(link)
    try:
        print(browser.title)
    except (NoSuchElementException, TimeoutException):
        print("Error")
    browser.quit()

Console Output:

Downloads
The Selenium Browser Automation Project :: Documentation for Selenium
Total Time Processed: --- 10.329657554626465 seconds ---

How to exit chrome sessions using threadpool?

1 Answers1