1

I'm trying to scrape Stackoverflow using python selenium.

I get StaleElementReferenceException for few elements when I try to print the link(href) of each question.

Below is the exception which is seen

selenium.common.exceptions.StaleElementReferenceException: Message: The element reference is stale. Either the element is no longer attached to the DOM or the page has been refreshed.

I've tried both

WebDriverWait(driver, 3).until(EC.presence_of_element_located(
(By.XPATH, xpath)))

and

driver.implicitly_wait(30)

I have already checked few questions and couldn't solve this problem StaleElementReferenceException: Element is no longer attached to the DOM: Selenium

Code snippet

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By

driver = webdriver.Firefox()
driver.get("https://stackoverflow.com/questions")
try:
    WebDriverWait(driver, 3).until(EC.presence_of_element_located(
        (By.XPATH, '//div[@id="tabs"]/a[@class="youarehere"]')))
    print "Page is ready!"
except TimeoutException:
    print "Time exceeded"

noofquestionsperpage = driver.find_element_by_xpath(
    '//div[@class="page-sizer fr"]/a[@class="page-numbers current"]').text
requirednumberofposts = 30
numberofpagesclick = requirednumberofposts / int(noofquestionsperpage)

print numberofpagesclick
if numberofpagesclick > 1:
    for i in range(numberofpagesclick):
        for a in driver.find_elements_by_xpath('//*[@id="questions"]/div[@class="question-summary"]'):
            try:
                WebDriverWait(driver, 3).until(EC.presence_of_element_located(
                    (By.XPATH, './/div[@class="summary"]/h3/a')))
                print a.find_element_by_xpath('.//div[@class="summary"]/h3/a').get_attribute('href')
            except Exception, e:
                print str(e).strip()
        try:
            driver.find_element_by_xpath(
                '//div[@class="pager fl"]/a[6]/span[@class="page-numbers next"]').click()
        except:
            print "All questons loaded"

Update

Found an alternate way to iterate the questions in stackoverflow,posting the code.Now I'm generating xpath based on div id of each quesion instead of using questions webelement which was causing StaleElementReferenceException earlier.

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By


driver = webdriver.Firefox()
driver.get("https://stackoverflow.com/questions")
try:
    WebDriverWait(driver, 3).until(EC.presence_of_element_located(
        (By.XPATH, '//div[@id="tabs"]/a[@class="youarehere"]')))
    print "Page is ready!"
except TimeoutException:
    print "Time exceeded"

noofquestionsperpage = driver.find_element_by_xpath(
    '//div[@class="page-sizer fr"]/a[@class="page-numbers current"]').text
requirednumberofposts = 30
numberofpagesclick = requirednumberofposts / int(noofquestionsperpage)

if numberofpagesclick > 1:
    for i in range(numberofpagesclick):
        for j in range(int(noofquestionsperpage)):
            xpath = "//div[@id='questions']/div[" + \
                str(j + 1) + "]/div[2]/h3/a"
            print driver.find_element_by_xpath(xpath).get_attribute('href')
    try:
        driver.find_element_by_xpath(
            '//div[@class="pager fl"]/a[6]/span[@class="page-numbers next"]').click()
    except:
        print "All questons loaded"
Community
  • 1
  • 1
Abhishek L
  • 81
  • 2
  • 10

0 Answers0