0

I am trying to scrape this website "https://www.realtruck.com/clazzio-leather-seat-covers/". It contains 6 drop down menus which I am trying to iterate through to collect the Year, Make, Model, Submodel, SKU data for all options. Essentially one big giant loop. The first 4 drop downs are no problem however the last two are tricky. On each click the webelement "re-hides" and my objects become stale after the first iteration. The following contains my code so far:

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import StaleElementReferenceException
import time

outFile = r'filepath.csv'   
outHandler = open(outFile, 'w')
chrome_driver = r'\FMinerProjects\chromedriver.exe'
driver = webdriver.Chrome(chrome_driver)
driver.maximize_window()
driver.get("https://www.realtruck.com/clazzio-leather-seat-covers/")
driver.implicitly_wait(15)
driver.find_element_by_xpath("""//*[@id="newsletterPopupClosex"]""").click()
select_year = driver.find_element_by_xpath("""//[@id="d_ye"]/option[1]""").text
select_make = driver.find_element_by_xpath("""//*[@id="d_ma"]/option[1]""").text
select_model = driver.find_element_by_xpath("""//*[@id="d_mo"]/option[1]""").text


dropdowns = driver.find_element_by_xpath("""//*[@id="d_ye"]""")
dropoptions = dropdowns.find_elements_by_tag_name("option")
for values in dropoptions:
    if values.text != select_year:    
        year = values.click()
        time.sleep(3)
        print values.text
        makes = driver.find_element_by_xpath("""//*[@id="d_ma"]""")
        make = makes.find_elements_by_tag_name("option")
        for each in make:
            if each.text != select_make:
                each.click()
                time.sleep(3)
                print each.text
                models = driver.find_element_by_xpath("""//*[@id="d_mo"]""")
                model = models.find_elements_by_tag_name("option")
                for sub in model:
                    if sub.text != select_model:
                        sub.click()
                        print sub.text
                        time.sleep(3)
                        bodies = driver.find_element_by_xpath("""//*[@id="d_bo"]""")
                        body = bodies.find_elements_by_tag_name("option")
                        for cab in body:
                            select_body = driver.find_element_by_xpath("""//*[@id="d_bo"]/option[1]""").text
                            if cab.text != select_body:
                                cab.click()
                                time.sleep(4)
                                urls = driver.current_url
                                link = '"' + urls + '"'
                                print link
                                driver.get(urls)
                                driver.implicitly_wait(5)  
                                driver.find_element_by_xpath("""//*[@id="choose_options"]""").click()
                                time.sleep(2)

This is where my issue begins. The following elements become stale after the second iteration due to the dynamic page javascript hidding the elements after each click for the last two dropdowns

element = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]""")

driver.execute_script("arguments[0].setAttribute('style', 'display: block;');", element)

lines = element.find_elements_by_tag_name("li")
for levels in lines:
    select_submodel = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]/ul/li[1]""").text
    if levels.text != select_submodel:
        levels.click()
        time.sleep(5)
        sku = driver.find_element_by_xpath("""//*[@id="group_189148"]/table/tbody/tr/td[2]/div[2]/div[5]/div[2]/a""").text
        if sku:
            print sku
            element = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]""")

            driver.execute_script("arguments[0].setAttribute('style', 'display: block;');", element)
            lines = element.find_elements_by_tag_name("li")                         
            continue

        else:
             element_2 = driver.find_element_by_xpath("""//*[@id="group_189148_d_a1"]/div[2]""")

             driver.execute_script("arguments[0].setAttribute('style', 'display: block;');", element_2)
             last = element_2.find_elements_by_tag_name("li")
             for end in last:
                 select_color = driver.find_element_by_xpath("""//*[@id="group_189148_d_a1"]/div[2]/ul/li[1]""").text
                 if end.text != select_color:
                     end.click()
                     time.sleep(2)
                     crazy = driver.find_element_by_xpath("""//*[@id="attribute_splitter"]/div""").text
                     print crazy
                     sku = driver.find_elements_by_xpath("""//*[@id="group_189148"]/table/tbody/tr/td[2]/div[2]/div[5]/div[2]/a""")
                     price = driver.find_elements_by_xpath("""//*[@id="group_189148"]/table/tbody/tr/td[2]/div[2]/div[2]/div[2]/span""")
                     continue

I am trying to scrape data from multiple dropdowns on multiple pages. My issue is that I keep getting stale element references exceptions after the 2nd iteration since elements keep re-hiding.I assume the variables from the first page will go stale also. Please help.

The following contains the error report I am receiving. It occurs on line 62 "element = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]""")" since the webelement becomes hidden after each click and therefore the objects that hold them become stale. Previous error reports reported that line 72 "if levels.text != select_submodel:" becomes stale after the second iteration. I have tried unhidden the element after each iteration like some other forms suggest but that does't work.

 Traceback (most recent call last):
 File "C:\Python27\Lib\site-packages\pythonwin\pywin\framework\scriptutils.py", line 326, in RunScript
 exec codeObject in __main__.__dict__
 File "C:\Users\marketing-x1-carbon\Documents\August2017_Files\IncompleteWebscrappers\real_truck_oct10.py", line 62, in <module>
 element = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]""")
 File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 365, in find_element_by_xpath
 return self.find_element(by=By.XPATH, value=xpath)
 File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 843, in find_element
 'value': value})['value']
 File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 308, in execute
 self.error_handler.check_response(response)
 File "C:\Python27\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 194, in check_response
 raise exception_class(message, screen, stacktrace)
 NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//*[@id="group_189148_d_an"]/div[2]"}
jake_2017
  • 1
  • 1
  • Can you please share the exact error logs ? Also please check https://stackoverflow.com/questions/27003423/python-selenium-stale-element-fix as well. – Diptman Oct 10 '17 at 21:20
  • could you tell us which code line report the error? – yong Oct 10 '17 at 23:09
  • Too many for loop in your code, if there is code in any loop trigger the page reload or redirect, the stale execption will happen. So go though your code to confirm it include such code. – yong Oct 10 '17 at 23:18
  • @ yong There are 6 drop downs. I do not see another way of looping without nesting loops. – jake_2017 Oct 11 '17 at 15:08

0 Answers0