I am trying to scrape this website "https://www.realtruck.com/clazzio-leather-seat-covers/". It contains 6 drop down menus which I am trying to iterate through to collect the Year, Make, Model, Submodel, SKU data for all options. Essentially one big giant loop. The first 4 drop downs are no problem however the last two are tricky. On each click the webelement "re-hides" and my objects become stale after the first iteration. The following contains my code so far:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import StaleElementReferenceException
import time
outFile = r'filepath.csv'
outHandler = open(outFile, 'w')
chrome_driver = r'\FMinerProjects\chromedriver.exe'
driver = webdriver.Chrome(chrome_driver)
driver.maximize_window()
driver.get("https://www.realtruck.com/clazzio-leather-seat-covers/")
driver.implicitly_wait(15)
driver.find_element_by_xpath("""//*[@id="newsletterPopupClosex"]""").click()
select_year = driver.find_element_by_xpath("""//[@id="d_ye"]/option[1]""").text
select_make = driver.find_element_by_xpath("""//*[@id="d_ma"]/option[1]""").text
select_model = driver.find_element_by_xpath("""//*[@id="d_mo"]/option[1]""").text
dropdowns = driver.find_element_by_xpath("""//*[@id="d_ye"]""")
dropoptions = dropdowns.find_elements_by_tag_name("option")
for values in dropoptions:
if values.text != select_year:
year = values.click()
time.sleep(3)
print values.text
makes = driver.find_element_by_xpath("""//*[@id="d_ma"]""")
make = makes.find_elements_by_tag_name("option")
for each in make:
if each.text != select_make:
each.click()
time.sleep(3)
print each.text
models = driver.find_element_by_xpath("""//*[@id="d_mo"]""")
model = models.find_elements_by_tag_name("option")
for sub in model:
if sub.text != select_model:
sub.click()
print sub.text
time.sleep(3)
bodies = driver.find_element_by_xpath("""//*[@id="d_bo"]""")
body = bodies.find_elements_by_tag_name("option")
for cab in body:
select_body = driver.find_element_by_xpath("""//*[@id="d_bo"]/option[1]""").text
if cab.text != select_body:
cab.click()
time.sleep(4)
urls = driver.current_url
link = '"' + urls + '"'
print link
driver.get(urls)
driver.implicitly_wait(5)
driver.find_element_by_xpath("""//*[@id="choose_options"]""").click()
time.sleep(2)
This is where my issue begins. The following elements become stale after the second iteration due to the dynamic page javascript hidding the elements after each click for the last two dropdowns
element = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]""")
driver.execute_script("arguments[0].setAttribute('style', 'display: block;');", element)
lines = element.find_elements_by_tag_name("li")
for levels in lines:
select_submodel = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]/ul/li[1]""").text
if levels.text != select_submodel:
levels.click()
time.sleep(5)
sku = driver.find_element_by_xpath("""//*[@id="group_189148"]/table/tbody/tr/td[2]/div[2]/div[5]/div[2]/a""").text
if sku:
print sku
element = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]""")
driver.execute_script("arguments[0].setAttribute('style', 'display: block;');", element)
lines = element.find_elements_by_tag_name("li")
continue
else:
element_2 = driver.find_element_by_xpath("""//*[@id="group_189148_d_a1"]/div[2]""")
driver.execute_script("arguments[0].setAttribute('style', 'display: block;');", element_2)
last = element_2.find_elements_by_tag_name("li")
for end in last:
select_color = driver.find_element_by_xpath("""//*[@id="group_189148_d_a1"]/div[2]/ul/li[1]""").text
if end.text != select_color:
end.click()
time.sleep(2)
crazy = driver.find_element_by_xpath("""//*[@id="attribute_splitter"]/div""").text
print crazy
sku = driver.find_elements_by_xpath("""//*[@id="group_189148"]/table/tbody/tr/td[2]/div[2]/div[5]/div[2]/a""")
price = driver.find_elements_by_xpath("""//*[@id="group_189148"]/table/tbody/tr/td[2]/div[2]/div[2]/div[2]/span""")
continue
I am trying to scrape data from multiple dropdowns on multiple pages. My issue is that I keep getting stale element references exceptions after the 2nd iteration since elements keep re-hiding.I assume the variables from the first page will go stale also. Please help.
The following contains the error report I am receiving. It occurs on line 62 "element = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]""")" since the webelement becomes hidden after each click and therefore the objects that hold them become stale. Previous error reports reported that line 72 "if levels.text != select_submodel:" becomes stale after the second iteration. I have tried unhidden the element after each iteration like some other forms suggest but that does't work.
Traceback (most recent call last):
File "C:\Python27\Lib\site-packages\pythonwin\pywin\framework\scriptutils.py", line 326, in RunScript
exec codeObject in __main__.__dict__
File "C:\Users\marketing-x1-carbon\Documents\August2017_Files\IncompleteWebscrappers\real_truck_oct10.py", line 62, in <module>
element = driver.find_element_by_xpath("""//*[@id="group_189148_d_an"]/div[2]""")
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 365, in find_element_by_xpath
return self.find_element(by=By.XPATH, value=xpath)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 843, in find_element
'value': value})['value']
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 308, in execute
self.error_handler.check_response(response)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//*[@id="group_189148_d_an"]/div[2]"}