I was hoping someone could take a look at my code and explain to me why I am seeing a runaway memory issue in chrome.exe processes. When I run the program everything seems stable for a few hours, but after around 8 hours I will have a single chrome.exe process that consumes around 5Gb of memory. The application is fairly simple. for each item that I want to search, a new process is created. Inside that process I create a single driver instance and then search for an element. If the element isn't present then I refresh the driver and continue searching. Here is a generic sample of my code.
import time
from multiprocessing import Process
import datetime as dt
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import selenium.common.exceptions as SE
import sendMail
class itemSearch(Process):
def __init__(self, item):
Process.__init__(self)
self.item = item
print("Starting Search for: "+str(self.item))
self.start()
def run(self):
"""For some reason multiprocessing was not allowing me to put the driver initializations outside of the run function. In threading I was able to pass the driver to init. Kept getting a PermissionError: [WinError 5] Access is denied. Putting the driver initialization into the run function seems to have fixed this issue. No fucking clue."""
options = Options()
options.add_experimental_option("detach",True)
self.driver = webdriver.Chrome(options=options)
self.wait = WebDriverWait(self.driver, timeout=20)
self.session = self.driver.session_id
self.driver.get(self.item)
#self.done = False
while not self.done:
self.search()
self.driver.close()
def search(self):
while True:
try:
print("Scanning for: "+str(self.item))
self.driver.find_element_by_xpath('//div[some xpath to a button]').click()
print("sending email")
url = self.driver.current_url
sendMail.sendNotification(receiver_email="yourmail.com", url=url)
break
except SE.NoSuchElementException:
print("Refreshing")
self.driver.refresh()
print(dt.datetime.now())
self.wait.until(EC.visibility_of_element_located((By.XPATH,'//div[some other xpath]')))
self.done = True
if __name__ == '__main__':
url1 = "https://www.somesite.com"
url2= "https://www.someothersite.com
searchItems = [url1, url2]
print("Starting search")
for item in searchItems:
print(item)
itemSearch(item)
As a work-around I added a function that check memory usage for all chrome.exe processes. The check is run on each loop iteration. I have set a max memory limit and once that limit is reached I close the chrome driver and make a call to the run function again. This is actually working very well for me. Here's the new code with the function incorporated:
import time
import psutil
from multiprocessing import Process
import datetime as dt
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import selenium.common.exceptions as SE
import sendMail
class itemSearch(Process):
def __init__(self, item):
Process.__init__(self)
self.item = item
print("Starting Search for: "+str(self.item))
self.start()
def run(self):
"""For some reason multiprocessing was not allowing me to put the driver initializations outside of the run function. In threading I was able to pass the driver to init. Kept getting a PermissionError: [WinError 5] Access is denied. Putting the driver initialization into the run function seems to have fixed this issue. No fucking clue."""
options = Options()
options.add_experimental_option("detach",True)
self.driver = webdriver.Chrome(options=options)
self.wait = WebDriverWait(self.driver, timeout=20)
self.session = self.driver.session_id
self.driver.get(self.item)
#self.done = False
while not self.done:
self.search()
self.driver.close()
def getMemoryUsage(self):
"Return the MB of ram being used by chrome."
process_list = []
total_mem = 0
for p in psutil.process_iter(['name']):
if p.info['name'] == "chrome.exe":
process_list.append(p.pid)
#Calculate total memory usage
for pid in process_list:
try:
#logger.info(str(pid)+" = "+str(psutil.Process(pid).memory_info().private/1000000))
total_mem += psutil.Process(pid).memory_info().private
except psutil.NoSuchProcess:
#logger.info("Process "+str(pid)+" not present")
pass
return total_mem/1000000
def search(self):
while True:
try:
print("Scanning for: "+str(self.item))
self.driver.find_element_by_xpath('//div[some xpath to a button]').click()
print("sending email")
url = self.driver.current_url
sendMail.sendNotification(receiver_email="yourmail.com", url=url)
break
except SE.NoSuchElementException:
print("Refreshing")
self.driver.refresh()
print(dt.datetime.now())
self.wait.until(EC.visibility_of_element_located((By.XPATH,'//div[some other xpath]')))
memUsage = self.getMemoryUsage()
print("Current Memory Usage at: "+str(memUsage)+"MB")
if memUsage > 7000:
#print("Memory Usage reached " +str(memUsage) +"MB. Restarting driver")
logger.info("Memory Usage reached " +str(memUsage) +"MB. Restarting driver")
self.driver.quit()
self.run()
self.done = True
if __name__ == '__main__':
url1 = "https://www.somesite.com"
url2= "https://www.someothersite.com
searchItems = [url1, url2]
print("Starting search")
for item in searchItems:
print(item)
itemSearch(item)