I'm using selenium chrom webdriver to crawl webpages one by one, and each time initialize a driver instance with closing operation after crawling finished. After trying several times, there are almost 10000 chrome processes in system, and can't be killed by kill command. How to handle this problem? Thanks~
code as follows:
@classmethod
def get_content_by_selenium(cls, url):
content = ''
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--start-maximized')
chrome_options.add_argument('--disable-extensions')
chrome_options.add_argument('--disable-infobars')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--no-proxy-server')
chrome_options.add_argument('--disable-dev-shm-usage')
try:
cls.driver = webdriver.Chrome(options=chrome_options, executable_path='/home/chromedriver') # Optional argument, if not specified will search path.
cls.driver.set_page_load_timeout(30)
cls.driver.get(url)
html = cls.driver.page_source
soup = BeautifulSoup(html, 'html.parser')
for script in soup(["script", "style"]):
script.extract()
meta = cls.get_meta(soup)
text = ' '.join(soup.text.split())
content = ' '.join([meta, text])
except Exception as e:
print(e)
print('webdriver failed, continue running')
finally:
if not cls.driver is None:
cls.driver.quit()
return content