I'm trying to scrape images from Ecosia search it's working fine when I run in locally with an error that is in this link Failed to read descriptor from node connection: A device attached to the system is not functioning error using ChromeDriver Selenium on Windows OS
but when I deploy it to Heroku it shows me a timeout exception
from selenium import webdriver
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
chrome_options = webdriver.ChromeOptions()
chrome_options.binary_location = os.environ.get("GOOGLE_CHROME_BIN")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("window-size=1400,800")
driver = webdriver.Chrome(executable_path=os.environ.get("CHROMEDRIVER_PATH"), chrome_options=chrome_options)
delay = 50
driver.get('https://www.ecosia.org/images?q=asfasf')
print('success')
size = driver.get_window_size()
print("Window size: width = {}px, height = {}px".format(size["width"], size["height"]))
try:
myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.ID, '__nuxt')))
print("Page is ready!")
except TimeoutException as e:
print(e)
print("Loading took too much time!")
finally:
images =driver.find_elements_by_tag_name('img')
for img in images:
src =img.get_attribute("src")
print(src)
I tried changing the target website to google and it runs fine but it gives the same error when I try to change the id that the program is waiting to load
and it doesn't print all the image sources so it doesn't load the full page even with google link
from selenium import webdriver
import os
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
chrome_options = webdriver.ChromeOptions()
chrome_options.binary_location = os.environ.get("GOOGLE_CHROME_BIN")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("window-size=1400,800")
driver = webdriver.Chrome(executable_path=os.environ.get("CHROMEDRIVER_PATH"), chrome_options=chrome_options)
delay =50
final_word = 'hey'
driver.get('https://www.google.com/search?q=what&tbm=isch&ved=2ahUKEwjV6Jbil8bxAhWGFisKHU-sAaMQ2-cCegQIABAA&oq=what&gs_lcp=CgNpbWcQAzIFCAAQsQMyBQgAELEDMggIABCxAxCDATIFCAAQsQMyBQgAELEDMgUIABCxAzIFCAAQsQMyBQgAELEDMgUIABCxAzIFCAAQsQNQmhdY_hpg2xtoAHAAeACAAYEBiAHYApIBAzAuM5gBAKABAaoBC2d3cy13aXotaW1nwAEB&sclient=img&ei=5PffYNXmC4atrAHP2IaYCg&bih=625&biw=1366&safe=images')
print('success')
size = driver.get_window_size()
print("Window size: width = {}px, height = {}px".format(size["width"], size["height"]))
try:
myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.CLASS_NAME, 'c7cjWc')))
print("Page is ready!")
except TimeoutException as e:
print(e)
print("Loading took too much time!")
finally:
a=driver.page_source
print(a)
images =driver.find_elements_by_tag_name('img')
for img in images:
src =img.get_attribute("src")
print(src)
and then I tried another website called ocean hero and it gave me the same error