I've seen solutions to this on other posts (mostly suggesting a longer waiting time), but have tried that and haven't had success.
Here's the error I'm getting:
Traceback (most recent call last):
File "LobbyistsPrep.py", line 126, in <module>
the_download = get_file(year, report, download_dir)
File "LobbyistsPrep.py", line 28, in get_file
Year.select_by_visible_text(year_text)
File "C:\Python27\lib\site-packages\selenium\webdriver\support\select.py", lin
e 120, in select_by_visible_text
self._setSelected(opt)
File "C:\Python27\lib\site-packages\selenium\webdriver\support\select.py", lin
e 212, in _setSelected
option.click()
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webelement.py",
line 80, in click
self._execute(Command.CLICK_ELEMENT)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webelement.py",
line 501, in _execute
return self._parent.execute(command, params)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", l
ine 308, in execute
self.error_handler.check_response(response)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\errorhandler.py"
, line 194, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.StaleElementReferenceException: Message: stale elemen
t reference: element is not attached to the page document
(Session info: chrome=65.0.3325.181)
(Driver info: chromedriver=2.33.506120 (e3e53437346286c0bc2d2dc9aa4915ba81d902
3f),platform=Windows NT 6.1.7601 SP1 x86_64)
Here's the relevant code:
def get_file(year_text, category, download_dir):
# Store a list of files in the Downloads directory.
# We will use this later to determine the filename of the the CSV we downloaded.
downloads_before = os.listdir( download_dir )
# Change the Year dropdown
Year = Select(driver.find_element_by_name('ctl00$ctl00$ContentPlaceHolder$ContentPlaceHolder1$ddYear'))
Year.select_by_visible_text(year_text)
time.sleep(30)
# Change the Expenditure Type dropdown
Type = Select(driver.find_element_by_name('ctl00$ctl00$ContentPlaceHolder$ContentPlaceHolder1$ddExpType'))
Type.select_by_visible_text(category)
time.sleep(30)
# Change the Report Month dropdown
Month = Select(driver.find_element_by_name('ctl00$ctl00$ContentPlaceHolder$ContentPlaceHolder1$ddMonth'))
Month.select_by_visible_text('-- All Available --')
time.sleep(30)
# Click the Export to CSV button (downloads the CSV file)
driver.find_element_by_name('ctl00$ctl00$ContentPlaceHolder$ContentPlaceHolder1$btnExport').click()
time.sleep(30)
# Now that we have downloaded the file, lets check the Downloads directory again and compare.
downloads_after = os.listdir( download_dir )
downloads_change = set(downloads_after) - set(downloads_before)
# If there is only one difference, then that file is the one we downloaded.
if len(downloads_change) == 1:
file_name = downloads_change.pop()
file_path = download_dir + file_name
return file_path
# Otherwise, something went wrong: Either the number of files changed by MORE than one, or NOTHING was downloaded.
else:
return False
driver.get('http://mec.mo.gov/mec/Lobbying/Lob_ExpCSV.aspx')
time.sleep(30)
for report in reports_wanted:
for year in years_wanted:
the_download = get_file(year, report, download_dir)
if the_download:
if report == 'Group':
print 'Downloaded ' + the_download + '. Adding to GROUP. Report:\t' + year + '\t' + report
group_files.append(the_download)
else:
print 'Downloaded ' + the_download + '. Adding to INDIV. Report:\t' + year + '\t' + report
files.append(the_download)
else:
print 'PROBLEM DOWNLOADING: \t' + year + '\t' + report
Our time.sleep used to be time.sleep(2) - I've tried changing it to 30, but that doesn't help, either.
I'm still pretty new to de-bugging scrapers, and this one wasn't built by me, so please be gentle. Thanks in advance.