selenium on heroku timeout exception

Question

I'm trying to scrape images from Ecosia search it's working fine when I run in locally with an error that is in this link Failed to read descriptor from node connection: A device attached to the system is not functioning error using ChromeDriver Selenium on Windows OS

but when I deploy it to Heroku it shows me a timeout exception

from selenium import webdriver
import os

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException

chrome_options = webdriver.ChromeOptions()
chrome_options.binary_location = os.environ.get("GOOGLE_CHROME_BIN")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("window-size=1400,800")
driver = webdriver.Chrome(executable_path=os.environ.get("CHROMEDRIVER_PATH"), chrome_options=chrome_options)

delay = 50
driver.get('https://www.ecosia.org/images?q=asfasf')
print('success')
size = driver.get_window_size()
print("Window size: width = {}px, height = {}px".format(size["width"], size["height"]))

try:
    myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.ID, '__nuxt')))
    print("Page is ready!")
except TimeoutException as e:
    print(e)
    print("Loading took too much time!")
finally:
    images =driver.find_elements_by_tag_name('img')
    for img in images:
        src =img.get_attribute("src")
        print(src)

I tried changing the target website to google and it runs fine but it gives the same error when I try to change the id that the program is waiting to load

and it doesn't print all the image sources so it doesn't load the full page even with google link

from selenium import webdriver
import os
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException

chrome_options = webdriver.ChromeOptions()
chrome_options.binary_location = os.environ.get("GOOGLE_CHROME_BIN")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("window-size=1400,800")
driver = webdriver.Chrome(executable_path=os.environ.get("CHROMEDRIVER_PATH"), chrome_options=chrome_options)

delay =50
final_word = 'hey'
driver.get('https://www.google.com/search?q=what&tbm=isch&ved=2ahUKEwjV6Jbil8bxAhWGFisKHU-sAaMQ2-cCegQIABAA&oq=what&gs_lcp=CgNpbWcQAzIFCAAQsQMyBQgAELEDMggIABCxAxCDATIFCAAQsQMyBQgAELEDMgUIABCxAzIFCAAQsQMyBQgAELEDMgUIABCxAzIFCAAQsQNQmhdY_hpg2xtoAHAAeACAAYEBiAHYApIBAzAuM5gBAKABAaoBC2d3cy13aXotaW1nwAEB&sclient=img&ei=5PffYNXmC4atrAHP2IaYCg&bih=625&biw=1366&safe=images')
print('success')
size = driver.get_window_size()
print("Window size: width = {}px, height = {}px".format(size["width"], size["height"]))

try:
    myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.CLASS_NAME, 'c7cjWc')))
    print("Page is ready!")
except TimeoutException as e:
    print(e)
    print("Loading took too much time!")
finally:
    a=driver.page_source
    print(a)
    images =driver.find_elements_by_tag_name('img')
    for img in images:
        src =img.get_attribute("src")
        print(src)

and then I tried another website called ocean hero and it gave me the same error

picture of Heroku logs

Figured this out? I can't find a solution anywhere. I think this is just completely broken? — datWooWoo, Jul 15 '21 at 16:49
@datWooWoo I added user agent and it works fine I mean it does give me the expected output but the timeout error still occurs selenium is unable to find some elements — Thenujan Sandramohan, Jul 21 '21 at 02:04

selenium on heroku timeout exception

0 Answers0