I'm trying to scrape Stackoverflow using python selenium.
I get StaleElementReferenceException
for few elements when I try to print the link(href) of each question.
Below is the exception which is seen
selenium.common.exceptions.StaleElementReferenceException: Message: The element reference is stale. Either the element is no longer attached to the DOM or the page has been refreshed.
I've tried both
WebDriverWait(driver, 3).until(EC.presence_of_element_located(
(By.XPATH, xpath)))
and
driver.implicitly_wait(30)
I have already checked few questions and couldn't solve this problem StaleElementReferenceException: Element is no longer attached to the DOM: Selenium
Code snippet
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
driver = webdriver.Firefox()
driver.get("https://stackoverflow.com/questions")
try:
WebDriverWait(driver, 3).until(EC.presence_of_element_located(
(By.XPATH, '//div[@id="tabs"]/a[@class="youarehere"]')))
print "Page is ready!"
except TimeoutException:
print "Time exceeded"
noofquestionsperpage = driver.find_element_by_xpath(
'//div[@class="page-sizer fr"]/a[@class="page-numbers current"]').text
requirednumberofposts = 30
numberofpagesclick = requirednumberofposts / int(noofquestionsperpage)
print numberofpagesclick
if numberofpagesclick > 1:
for i in range(numberofpagesclick):
for a in driver.find_elements_by_xpath('//*[@id="questions"]/div[@class="question-summary"]'):
try:
WebDriverWait(driver, 3).until(EC.presence_of_element_located(
(By.XPATH, './/div[@class="summary"]/h3/a')))
print a.find_element_by_xpath('.//div[@class="summary"]/h3/a').get_attribute('href')
except Exception, e:
print str(e).strip()
try:
driver.find_element_by_xpath(
'//div[@class="pager fl"]/a[6]/span[@class="page-numbers next"]').click()
except:
print "All questons loaded"
Update
Found an alternate way to iterate the questions in stackoverflow,posting the code.Now I'm generating xpath based on div id of each quesion instead of using questions
webelement which was causing StaleElementReferenceException
earlier.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
driver = webdriver.Firefox()
driver.get("https://stackoverflow.com/questions")
try:
WebDriverWait(driver, 3).until(EC.presence_of_element_located(
(By.XPATH, '//div[@id="tabs"]/a[@class="youarehere"]')))
print "Page is ready!"
except TimeoutException:
print "Time exceeded"
noofquestionsperpage = driver.find_element_by_xpath(
'//div[@class="page-sizer fr"]/a[@class="page-numbers current"]').text
requirednumberofposts = 30
numberofpagesclick = requirednumberofposts / int(noofquestionsperpage)
if numberofpagesclick > 1:
for i in range(numberofpagesclick):
for j in range(int(noofquestionsperpage)):
xpath = "//div[@id='questions']/div[" + \
str(j + 1) + "]/div[2]/h3/a"
print driver.find_element_by_xpath(xpath).get_attribute('href')
try:
driver.find_element_by_xpath(
'//div[@class="pager fl"]/a[6]/span[@class="page-numbers next"]').click()
except:
print "All questons loaded"