I tried different ways to make work Selenium in my Python script to scraping web with an online Jupyter notebook without any success. I read many other instructions (like this, this) or answers (like this, this, this, this and many others) about similar questions but nothing seems work for me. On the virtual environment I downloaded both Firefox (v81.0) and geckodriver (v0.27) in the development folder, and I gave both all the permissions:
jupyterlab@jupyterlab-sps:/resources/testDevelop$ ls -l
total 7797
drwxrwsr-x 8 jupyterlab resources 4096 Oct 8 13:24 firefox
-rwxrwxrwx 1 jupyterlab resources 7274984 Oct 8 13:21 geckodriver
-rw-rw-r-- 1 jupyterlab resources 120 Oct 12 08:47 geckodriver.log
-rw-rw-r-- 1 jupyterlab resources 31813 Oct 12 09:42 testDevelop.ipynb
and
jupyterlab@jupyterlab-sps:/resources/testDevelop/firefox$ ls -l
total 165651
-rw-rw-r-- 1 jupyterlab resources 825 Sep 30 14:26 Throbber-small.gif
-rw-rw-r-- 1 jupyterlab resources 895 Sep 30 15:49 application.ini
drwxrwsr-x 4 jupyterlab resources 4096 Oct 8 13:24 browser
-rwxrwxr-x 1 jupyterlab resources 241720 Sep 30 16:28 crashreporter
-rw-rw-r-- 1 jupyterlab resources 4003 Sep 30 14:26 crashreporter.ini
drwxrwsr-x 3 jupyterlab resources 4096 Oct 8 13:24 defaults
-rw-rw-r-- 1 jupyterlab resources 174 Sep 30 16:28 dependentlibs.list
-rwxrwxr-x 1 jupyterlab resources 14656 Sep 30 16:28 firefox
-rwxrwxr-x 1 jupyterlab resources 569104 Sep 30 16:28 firefox-bin
-rw-rw-r-- 1 jupyterlab resources 1449 Sep 30 16:32 firefox-bin.sig
-rw-rw-r-- 1 jupyterlab resources 1449 Sep 30 16:32 firefox.sig
drwxrwsr-x 2 jupyterlab resources 4096 Oct 8 13:24 fonts
drwxrwsr-x 3 jupyterlab resources 4096 Oct 8 13:24 gmp-clearkey
drwxrwsr-x 2 jupyterlab resources 4096 Oct 8 13:24 gtk2
drwxrwsr-x 2 jupyterlab resources 4096 Oct 8 13:24 icons
-rwxrwxr-x 1 jupyterlab resources 895568 Sep 30 16:28 libfreeblpriv3.so
-rwxrwxr-x 1 jupyterlab resources 691064 Sep 30 16:28 libgraphitewasm.so
-rwxrwxr-x 1 jupyterlab resources 43408 Sep 30 16:28 liblgpllibs.so
-rwxrwxr-x 1 jupyterlab resources 2175768 Sep 30 16:28 libmozavcodec.so
-rwxrwxr-x 1 jupyterlab resources 220128 Sep 30 16:28 libmozavutil.so
-rwxrwxr-x 1 jupyterlab resources 14352 Sep 30 16:28 libmozgtk.so
-rwxrwxr-x 1 jupyterlab resources 113512 Sep 30 16:28 libmozsandbox.so
-rwxrwxr-x 1 jupyterlab resources 1207424 Sep 30 16:28 libmozsqlite3.so
-rwxrwxr-x 1 jupyterlab resources 18376 Sep 30 16:28 libmozwayland.so
-rwxrwxr-x 1 jupyterlab resources 243728 Sep 30 16:28 libnspr4.so
-rwxrwxr-x 1 jupyterlab resources 694896 Sep 30 16:28 libnss3.so
-rwxrwxr-x 1 jupyterlab resources 465616 Sep 30 16:28 libnssckbi.so
-rwxrwxr-x 1 jupyterlab resources 191728 Sep 30 16:28 libnssutil3.so
-rwxrwxr-x 1 jupyterlab resources 184120 Sep 30 16:28 liboggwasm.so
-rwxrwxr-x 1 jupyterlab resources 22872 Sep 30 16:28 libplc4.so
-rwxrwxr-x 1 jupyterlab resources 14592 Sep 30 16:28 libplds4.so
-rwxrwxr-x 1 jupyterlab resources 168024 Sep 30 16:28 libsmime3.so
-rwxrwxr-x 1 jupyterlab resources 326208 Sep 30 16:28 libsoftokn3.so
-rwxrwxr-x 1 jupyterlab resources 406208 Sep 30 16:28 libssl3.so
-rwxrwxr-x 1 jupyterlab resources 131841712 Sep 30 16:28 libxul.so
-rw-rw-r-- 1 jupyterlab resources 1449 Sep 30 16:32 libxul.so.sig
-rwxrwxr-x 1 jupyterlab resources 1260688 Sep 30 16:28 minidump-analyzer
-rw-rw-r-- 1 jupyterlab resources 26270759 Sep 30 16:32 omni.ja
-rwxrwxr-x 1 jupyterlab resources 614144 Sep 30 16:28 pingsender
-rw-rw-r-- 1 jupyterlab resources 166 Sep 30 16:28 platform.ini
-rwxrwxr-x 1 jupyterlab resources 564936 Sep 30 16:28 plugin-container
-rw-rw-r-- 1 jupyterlab resources 1449 Sep 30 16:32 plugin-container.sig
-rw-rw-r-- 1 jupyterlab resources 2017 Sep 30 16:32 precomplete
-rw-rw-r-- 1 jupyterlab resources 0 Sep 30 16:28 removed-files
-rw-rw-r-- 1 jupyterlab resources 132 Sep 30 16:28 update-settings.ini
-rwxrwxr-x 1 jupyterlab resources 101864 Sep 30 16:28 updater
-rw-rw-r-- 1 jupyterlab resources 638 Sep 30 16:28 updater.ini
I also added the path of firefox and geckodriver to the env variable paths, that is:
jupyterlab@jupyterlab-sps:/resources/testDevelop/firefox$ echo $PATH
/resources/testDevelop:/resources/testDevelop/firefox:/resources/firefox:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games:/home/jupyterlab/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/jre1.8.0_211/bin:/home/jupyterlab/hadoop-2.9.2/bin:/home/jupyterlab/spark-2.4.3/bin
But if I try this code:
import os
import selenium
from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
driver=Firefox(executable_path='/resources/testDevelop/geckodriver',)
I got this:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-23-e332a8e620e3> in <module>
6 from webdriver_manager.firefox import GeckoDriverManager
7
----> 8 driver=Firefox(executable_path='/resources/testDevelop/geckodriver',)
9 cap = DesiredCapabilities().FIREFOX
10 cap["marionette"] = False
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/firefox/webdriver.py in __init__(self, firefox_profile, firefox_binary, timeout, capabilities, proxy, executable_path, options, service_log_path, firefox_options, service_args, desired_capabilities, log_path, keep_alive)
177 else:
178 if self.binary is None:
--> 179 self.binary = FirefoxBinary()
180 if self.profile is None:
181 self.profile = FirefoxProfile()
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/firefox/firefox_binary.py in __init__(self, firefox_path, log_file)
45 self.command_line = None
46 if self._start_cmd is None:
---> 47 self._start_cmd = self._get_firefox_start_cmd()
48 if not self._start_cmd.strip():
49 raise WebDriverException(
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/firefox/firefox_binary.py in _get_firefox_start_cmd(self)
167 raise RuntimeError(
168 "Could not find firefox in your system PATH." +
--> 169 " Please specify the firefox binary location or install firefox")
170 return start_cmd
171
RuntimeError: Could not find firefox in your system PATH. Please specify the firefox binary location or install firefox
so I tried:
import os
import selenium
from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
cap = DesiredCapabilities().FIREFOX
cap["marionette"] = False
driver = os.path.normpath(os.path.join(os.getcwd(), 'geckodriver'))
binary = os.path.normpath(os.path.join(os.getcwd(), 'firefox', 'firefox'))
ff_binary = webdriver.firefox.firefox_binary.FirefoxBinary(firefox_path=binary, log_file='ff_log.log')
#driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
browser = webdriver.Firefox(firefox_binary=ff_binary, capabilities=cap, executable_path=driver)
But I got this strange error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-26-0bb63b20498c> in <module>
12 binary = os.path.normpath(os.path.join(os.getcwd(), 'firefox', 'firefox'))
13 ff_binary = webdriver.firefox.firefox_binary.FirefoxBinary(firefox_path=binary, log_file='ff_log.log')
---> 14 browser = webdriver.Firefox(firefox_binary=ff_binary, capabilities=cap, executable_path=driver)
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/firefox/webdriver.py in __init__(self, firefox_profile, firefox_binary, timeout, capabilities, proxy, executable_path, options, service_log_path, firefox_options, service_args, desired_capabilities, log_path, keep_alive)
189
190 executor = ExtensionConnection("127.0.0.1", self.profile,
--> 191 self.binary, timeout)
192 RemoteWebDriver.__init__(
193 self,
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/firefox/extension_connection.py in __init__(self, host, firefox_profile, firefox_binary, timeout)
50 self.profile.add_extension()
51
---> 52 self.binary.launch_browser(self.profile, timeout=timeout)
53 _URL = "http://%s:%d/hub" % (HOST, PORT)
54 RemoteConnection.__init__(
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/firefox/firefox_binary.py in launch_browser(self, profile, timeout)
70 self.profile = profile
71
---> 72 self._start_from_profile_path(self.profile.path)
73 self._wait_until_connectable(timeout=timeout)
74
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/firefox/firefox_binary.py in _start_from_profile_path(self, path)
93 self.process = Popen(
94 command, stdout=self._log_file, stderr=STDOUT,
---> 95 env=self._firefox_env)
96
97 def _wait_until_connectable(self, timeout=30):
~/conda/envs/python/lib/python3.6/subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors)
685 (p2cread, p2cwrite,
686 c2pread, c2pwrite,
--> 687 errread, errwrite) = self._get_handles(stdin, stdout, stderr)
688
689 # We wrap OS handles *before* launching the child, otherwise a
~/conda/envs/python/lib/python3.6/subprocess.py in _get_handles(self, stdin, stdout, stderr)
1202 else:
1203 # Assuming file-like object
-> 1204 c2pwrite = stdout.fileno()
1205
1206 if stderr is None:
AttributeError: 'str' object has no attribute 'fileno'
I've not understood what's the problem with this error. I've checked singularly the values of addresses and they seems correct, that is:
- binary returns: '/resources/StockScreener/firefox/firefox'
- driver returns: '/resources/StockScreener/geckodriver'
- ff_binary returns: <selenium.webdriver.firefox.firefox_binary.FirefoxBinary at 0x7f10b681e160>
I also tried to use GeckoDriverManager in this way:
import os
import selenium
from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from webdriver_manager.firefox import GeckoDriverManager
cap = DesiredCapabilities().FIREFOX
cap["marionette"] = False
driver = os.path.normpath(os.path.join(os.getcwd(), 'geckodriver'))
binary = os.path.normpath(os.path.join(os.getcwd(), 'firefox', 'firefox'))
ff_binary = webdriver.firefox.firefox_binary.FirefoxBinary(firefox_path=binary, log_file='ff_log.log')
#browser = webdriver.Firefox(firefox_binary=ff_binary, capabilities=cap, executable_path=driver)
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
and returns:
[WDM] - Driver [/home/jupyterlab/.wdm/drivers/geckodriver/linux64/v0.27.0/geckodriver] found in cache
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-32-012cab2ea574> in <module>
13 ff_binary = webdriver.firefox.firefox_binary.FirefoxBinary(firefox_path=binary, log_file='ff_log.log')
14 #browser = webdriver.Firefox(firefox_binary=ff_binary, capabilities=cap, executable_path=driver)
---> 15 driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
16 #browser.get('http://google.com/')
17 #Simple assignment
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/firefox/webdriver.py in __init__(self, firefox_profile, firefox_binary, timeout, capabilities, proxy, executable_path, options, service_log_path, firefox_options, service_args, desired_capabilities, log_path, keep_alive)
177 else:
178 if self.binary is None:
--> 179 self.binary = FirefoxBinary()
180 if self.profile is None:
181 self.profile = FirefoxProfile()
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/firefox/firefox_binary.py in __init__(self, firefox_path, log_file)
45 self.command_line = None
46 if self._start_cmd is None:
---> 47 self._start_cmd = self._get_firefox_start_cmd()
48 if not self._start_cmd.strip():
49 raise WebDriverException(
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/firefox/firefox_binary.py in _get_firefox_start_cmd(self)
167 raise RuntimeError(
168 "Could not find firefox in your system PATH." +
--> 169 " Please specify the firefox binary location or install firefox")
170 return start_cmd
171
RuntimeError: Could not find firefox in your system PATH. Please specify the firefox binary location or install firefox
Consider that in all cases, I have:
jupyterlab@jupyterlab-sps:/resources/testDevelop/firefox$ whereis firefox
firefox: /resources/testDevelop/firefox /resources/testDevelop/firefox/firefox.sig /resources/testDevelop/firefox/firefox
Finally, if I write only:
import os
import selenium
from selenium import webdriver
from selenium.webdriver import Firefox
#from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
#from webdriver_manager.firefox import GeckoDriverManager
driver=Firefox(executable_path='/resources/testDevelop/geckodriver')
I got the following error, after restarting the kernel without installing webdriver-manager:
---------------------------------------------------------------------------
SessionNotCreatedException Traceback (most recent call last)
<ipython-input-2-89dbd2507c70> in <module>
6 #from webdriver_manager.firefox import GeckoDriverManager
7
----> 8 driver=Firefox(executable_path='/resources/testDevelop/geckodriver')
9 #cap = DesiredCapabilities().FIREFOX
10 #cap["marionette"] = False
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/firefox/webdriver.py in __init__(self, firefox_profile, firefox_binary, timeout, capabilities, proxy, executable_path, options, service_log_path, firefox_options, service_args, desired_capabilities, log_path, keep_alive)
172 command_executor=executor,
173 desired_capabilities=capabilities,
--> 174 keep_alive=True)
175
176 # Selenium remote
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py in __init__(self, command_executor, desired_capabilities, browser_profile, proxy, keep_alive, file_detector, options)
155 warnings.warn("Please use FirefoxOptions to set browser profile",
156 DeprecationWarning, stacklevel=2)
--> 157 self.start_session(capabilities, browser_profile)
158 self._switch_to = SwitchTo(self)
159 self._mobile = Mobile(self)
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py in start_session(self, capabilities, browser_profile)
250 parameters = {"capabilities": w3c_caps,
251 "desiredCapabilities": capabilities}
--> 252 response = self.execute(Command.NEW_SESSION, parameters)
253 if 'sessionId' not in response:
254 response = response['value']
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
319 response = self.command_executor.execute(driver_command, params)
320 if response:
--> 321 self.error_handler.check_response(response)
322 response['value'] = self._unwrap_value(
323 response.get('value', None))
~/conda/envs/python/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
240 alert_text = value['alert'].get('text')
241 raise exception_class(message, screen, stacktrace, alert_text)
--> 242 raise exception_class(message, screen, stacktrace)
243
244 def _value_or_default(self, obj, key, default):
SessionNotCreatedException: Message: Unable to find a matching set of capabilities
it seems the issue is subject to many updates of versions so new updates can cause mismathes and problems. How to solve? Can you suggest a similar way to make easily web scraping (running JavaScript) maybe using other libs?