0

I wrote a script to find the download link through a series of click, first on the settings gear icon then on the "Export data" tab and finally on the click here to download data link.

However when i click on the final link it does not download the data to my specified default directory.

**ideally i would like to download the data directly to a variable but i couldn't even figure out the why the general download wasn't working.

I have tried getting the href from the download link and opening a new tab using that url but it still gives me nothing

URL = 'https://edap.epa.gov/public/single/?appid=73b2b6a5-70c6-4820-b3fa-186ac094f10d&sheet=1e76b65b-dd6c-41fd-9143-ba44874e1f9d'
DELAY = 10



def init_driver(url):
    options = webdriver.chrome.options.Options()
    path = '/Users/X/Applications/chromedriver'
    options.add_argument("--headless")
    options.add_argument("download.default_directory=Users/X/Python/data_scraper/epa_data")
    driver = webdriver.Chrome(chrome_options= options, executable_path=path)
    driver.implicitly_wait(20)
    driver.get(url)
    return driver



def find_settings(web_driver):
    #find the settings gear
    #time.sleep(10)
    try:
        driver_wait = WebDriverWait(web_driver,10)
        ng_scope = driver_wait.until(EC.visibility_of_element_located((By.CLASS_NAME,"ng-scope")))
        settings = web_driver.find_element_by_css_selector("span.cl-icon.cl-icon--cogwheel.cl-icon-right-align")
        print(settings)
        settings.click()
        #export_data = web_driver.find_elements_by_css_selector("span.lui-list__text.ng-binding")
        #print(web_driver.page_source)



    except Exception as e:
        print(e)
        print(web_driver.page_source)


def get_settings_list(web_driver):
    #find the export button and download data
    menu_item_list = {}

    find_settings(web_driver)
    #print(web_driver.page_source)

    try:
        time.sleep(8)
        print("got menu_items")
        menu_items = web_driver.find_elements_by_css_selector("span.lui-list__text.ng-binding")
        for i in menu_items:
            print(i.text)
            menu_item_list[i.text] = i

    except Exception as e:
        print(e)

    return menu_item_list


def get_export_data(web_driver):
    menu_items = get_settings_list(web_driver)
    print(menu_items)
    export_data = menu_items['Export data']
    export_data.click()

    web_driver.execute_script("window.open();")
    print(driver.window_handles)
    main_window = driver.window_handles[0]
    temp_window = driver.window_handles[1]
    driver.switch_to_window(main_window)


    time.sleep(8)

    download_data = driver.find_element_by_xpath("//a[contains(text(), 'Click here to download your data file.')]")
    download_href = download_data.get_attribute('href')

    print(download_href)
    download_data.click()
    driver.switch_to_window(temp_window)
    driver.get("https://edap.epa.gov"+download_href)
    print(driver.page_source)



driver = init_driver(URL)
#get_settings_list(driver)
get_export_data(driver)

I would like to have this code emulate the manual action of clicking the settings gear icon, then export data then download data which downloads data in a csv (ideally i want to skip the file and put in a pandas dataframe, but that an issue for another time)

Rman n
  • 45
  • 6
  • after you get the URL for the download, you can use requests to grab the file and drop it onto your computer, which can be simpler than dealing with selenium to download – AndrewH Jun 21 '19 at 20:59

1 Answers1

0

For security reasons, Chrome will not allow downloads while running headless. Here's a link to some more information and a possible workaround.

Unless you need to use Chrome, Firefox will allow downloads while headless - albeit with some tweaking.

Chris B.
  • 455
  • 7
  • 19