0

I am using selenium/python to save a series of webpages to pdf. The webpages have a table that is rendered with javascript; I am using "find_element_by_xpath" to identify that the pdf icon in the js table appeared before proceeding with the print. Optimally, I did not want to implement a set a hard wait/sleep time as I have thousands of pages to save.

The code seems to work but no pdf is saved.

The code is as follows:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import json

options = webdriver.ChromeOptions()
settings = {
       "recentDestinations": [{
            "id": "Save as PDF",
            "origin": "local",
            "account": "",
        }],
        "selectedDestinationId": "Save as PDF",
        "version": 2
    }
prefs = {'printing.print_preview_sticky_settings.appState': json.dumps(settings)}
options.add_experimental_option('prefs', prefs)
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_argument('--kiosk-printing')
CHROMEDRIVER_PATH = 'chromedriver.exe'
driver = webdriver.Chrome(options=options, executable_path=CHROMEDRIVER_PATH)
driver = webdriver.Chrome(options=options, executable_path=CHROMEDRIVER_PATH)
try:
        element = driver.find_element_by_xpath("//div[@class='fas fa-file-pdf']")
        WebDriverWait(driver, 10).until(EC.staleness_of(element))
except NoSuchElementException:
        element = None

print(element)
driver.get("url")

driver.execute_script('window.print();')
#driver.quit()
dtx780
  • 29
  • 5

1 Answers1

0

This is working

First I get document link using driver.findelement(). Than I pass that url in request(). If we get response.status_code == 200 so we can download document otherwise we can't. Than I just write document content using response.content.strong textDocument is a document path with file_name.extension. Please ignore other code. It is just for making document path.

import os
from selenium import webdriver

if os.path.exists(temp_down_path):
    if len(os.listdir(temp_down_path)) != 0:
        for i in os.listdir(temp_down_path):
            if os.path.isdir(temp_down_path + i):
                shutil.rmtree(temp_down_path + i)
            elif os.path.isfile(temp_down_path + i):
                os.remove(temp_down_path + i)
    else:
        pass
else:
    os.makedirs(temp_down_path)

temp_down_path = os.getcwd() + '\\temp_files\\'
options = webdriver.ChromeOptions()
options.add_experimental_option('prefs', {
        "download.default_directory": temp_down_path,  # Change default directory for downloads
        "download.prompt_for_download": False,  # To auto download the file
        "download.directory_upgrade": True,
        "plugins.always_open_pdf_externally": True  # It will not show PDF directly in chrome
    })
driver = webdriver.Chrome(options=options, executable_path= 'chromedriver.exe')

search_url = 'your site url'
driver.get(search_url)

link = driver.find_element(By.TAG_NAME, "a").get_attribute('href')
response = requests.get(link)
Document =  os.getcwd() + 'file_name' + '.pdf'
if response.status_code == 200:
    open(Document, 'wb').write(response.content)
else:
    print('Document download problem')
Orloff
  • 16
  • 1