I've been unable run Selenium from within Databricks. I have followed the steps of others in various other threads:
https://forums.databricks.com/questions/15480/how-to-add-webdriver-for-selenium-in-databricks.html
How to use Selenium in Databricks and accessing and moving downloaded files to mounted storage
cannot get selenium webdriver to work in azure databricks
My code currently looks like this:
%sh
sudo add-apt-repository ppa:canonical-chromium-builds/stage
/usr/bin/yes | sudo apt update
/usr/bin/yes | sudo apt install chromium-browser
import os
from webdrivermanager import ChromeDriverManager
from selenium import webdriver
cdd = ChromeDriverManager().download_and_install()
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--remote-debugging-port=9009')
driver = webdriver.Chrome(executable_path=cdd[0], options=chrome_options)
# Test driver connection
driver.get("https://www.google.com")
driver.find_element_by_css_selector("img").get_attribute("alt")
Exception:
---------------------------------------------------------------------------
WebDriverException Traceback (most recent call last)
<command-2232618947863762> in <module>
12 chrome_driver = "/usr/bin/chromedriver"
13
---> 14 driver = webdriver.Chrome(executable_path=cdd[0], options=chrome_options)
15
16 # Test driver connection
/databricks/python/lib/python3.8/site-packages/selenium/webdriver/chrome/webdriver.py in __init__(self, executable_path, port, options, service_args, desired_capabilities, service_log_path, chrome_options, keep_alive)
74
75 try:
---> 76 RemoteWebDriver.__init__(
77 self,
78 command_executor=ChromeRemoteConnection(
/databricks/python/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py in __init__(self, command_executor, desired_capabilities, browser_profile, proxy, keep_alive, file_detector, options)
155 warnings.warn("Please use FirefoxOptions to set browser profile",
156 DeprecationWarning, stacklevel=2)
--> 157 self.start_session(capabilities, browser_profile)
158 self._switch_to = SwitchTo(self)
159 self._mobile = Mobile(self)
/databricks/python/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py in start_session(self, capabilities, browser_profile)
250 parameters = {"capabilities": w3c_caps,
251 "desiredCapabilities": capabilities}
--> 252 response = self.execute(Command.NEW_SESSION, parameters)
253 if 'sessionId' not in response:
254 response = response['value']
/databricks/python/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
319 response = self.command_executor.execute(driver_command, params)
320 if response:
--> 321 self.error_handler.check_response(response)
322 response['value'] = self._unwrap_value(
323 response.get('value', None))
/databricks/python/lib/python3.8/site-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
240 alert_text = value['alert'].get('text')
241 raise exception_class(message, screen, stacktrace, alert_text)
--> 242 raise exception_class(message, screen, stacktrace)
243
244 def _value_or_default(self, obj, key, default):
WebDriverException: Message: unknown error: Chrome failed to start: exited abnormally.
(chrome not reachable)
(The process started from chrome location /usr/bin/chromium-browser is no longer running, so ChromeDriver is assuming that Chrome has crashed.)