I'm currently trying to make a script making a lot of google researchs with selenium, then clicking on first google result to finally return the current url.
The fact is, after something like 40 researches, I always get the google reCaptcha, which is a huge problem for me. I don't want selenium to resolve a Captcha, of course, but I'd love to know a way allowing me to never encounter this captcha, even if it implies more execution time.
Here is my code :
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
import os
import pandas as pd
import numpy as np
from fake_useragent import UserAgent
from selenium.webdriver.chrome.options import Options
path = r"C:\Logiciels\Anaconda3\WebBrowsers\chromedriver.exe"
url = []
# Copy input to output
output_table = input_table.copy()
"""
options = Options()
ua = UserAgent(verify_ssl=False, use_cache_server=False)
userAgent = ua.random
options.add_argument(f'user-agent={userAgent}')
"""
browser = webdriver.Chrome(executable_path=path)
browser.maximize_window()
browser.get('http://www.google.com')
wait = WebDriverWait(browser, 5)
iframe = wait.until(EC.element_to_be_clickable([By.CSS_SELECTOR, '#cnsw > iframe']))
browser.switch_to.frame(iframe)
browser.find_element_by_xpath("//span[contains(@class, 'RveJvd snByac') and contains(text(), 'accepte')]").click()
browser.switch_to.parent_frame()
for name, code, city in zip(output_table["Nom etablissement"], output_table["Code postal"], output_table["Commune"]):
browser.get('http://www.google.com')
time.sleep(1)
search = browser.find_element_by_name('q')
req = "{0} {1} {2} site:societe.com".format(name, code, city)
try:
search.send_keys(req)
search.send_keys(Keys.RETURN)
time.sleep(1)
browser.find_element_by_tag_name("cite").click()
url.append(browser.current_url)
time.sleep(1)
except NoSuchElementException:
url.append(np.nan)
time.sleep(1)
browser.quit()
output_table["url"] = url
I tried to use fake user agent, but maybe because of my company proxy, it couldn't work.
Do you have any idea or solution for me ?
Thank you very much,
Vincent