10

I want to download a website as pdf file, it's working fine, but it should download the file to a specific path, instead it's just downloading the file to my default download directory.

import json
from selenium import webdriver

appState = {
    "recentDestinations": [
        {
            "id": "Save as PDF",
            "origin": "local"
        }
    ],
    "selectedDestinationId": "Save as PDF",
    "version": 2,
    'download.default_directory': 'C:\\Users\\Oli\\Google Drive',
    "download.directory_upgrade": True
}

profile = {'printing.print_preview_sticky_settings.appState': json.dumps(appState)}

chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option('prefs', profile)
chrome_options.add_argument('--kiosk-printing')

driver = webdriver.Chrome(chrome_options=chrome_options)
driver.get('https://www.google.com/')
driver.execute_script('window.print();')

By the way anyone got an idea to safe the file with a specific name?

Oliver Weidner
  • 305
  • 1
  • 3
  • 11
  • Since selenium use the page title as the PDF filename, so just change the page title to the name you want to give your PDF before printing . `driver.execute_script('document.title="{}";'.format(YOUR_PDF_NAME)); driver.execute_script('window.print();')` – iMath May 01 '21 at 09:34

4 Answers4

11

The download.default_directory setting is only for downloaded content. Chrome treats files saved on the page differently. To change the default folder for a printout of the page, simply set the savefile.default_directory value instead.

So the full example to print to pdf for a custom location:

import json
from selenium import webdriver

appState = {
    "recentDestinations": [
        {
            "id": "Save as PDF",
            "origin": "local",
            "account": ""
        }
    ],
    "selectedDestinationId": "Save as PDF",
    "version": 2
}

profile = {'printing.print_preview_sticky_settings.appState': json.dumps(appState),
           'savefile.default_directory': 'path/to/dir/'}

chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option('prefs', profile)
chrome_options.add_argument('--kiosk-printing')

driver = webdriver.Chrome(options=chrome_options)
driver.get(url)
driver.execute_script('window.print();')
kayoz
  • 1,104
  • 12
  • 16
2

download.default_directory could be added not to appState but to "prefs" of add_experimental_option

like:

chrome_options.add_experimental_option("prefs", {
    'download.default_directory': 'C:\\Users\\Oli\\Google Drive',
    'download.directory_upgrade': True
})

but in your case it wouldn't help, as this option set location for 'file -> save as', and you need 'print -> save as'

As a workaround you could use --print-to-pdf argument for Chrome (no need to run Chrome Webdriver, but Chrome itself in a headless mode)

import os

path_to_file = 'C:\\Users\\Oli\\Google Drive\\'
name_of_file = '1.pdf'
page_to_open = 'http://example.com'

command_to_run = 'start chrome --headless --print-to-pdf="{0}{1}" {2}'.format(path_to_file, name_of_file, page_to_open)
print('launch:'+command_to_run)

os.popen(command_to_run)

Be careful as it's running in silent mode, no warning messages if file is not created (for example if no such directory, or no admin rights to C:\Users, or no such webpage).

And you could always test right in the command line (cmd) like:

start chrome --headless --print-to-pdf="C:\\temp\\1.pdf" http://example.com
Litvin
  • 330
  • 1
  • 9
  • Thank you for your explanation. Is it possible to control Chrome without selenium like the selenium browser? Because the link i get to the PDF-file is a temporary link and i can't open a new chrome browser to download this pdf file. – Oliver Weidner Feb 11 '19 at 17:17
  • No, you need Selenium as you need some actions before the saving. So as a workaround you could work with the files on your drive. Look at the example in one more answer. – Litvin Feb 13 '19 at 18:10
2

The key is to use:

pdf = webdriver.execute_cdp_cmd(
        "Page.printToPDF", {
        "printBackground": True,

    })

Then you can write the pdf to wherever you want. Here is a full example:

import base64
from typing import Optional
from pathlib import Path
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

def svg_to_pdf_chromium(filename: Path,  out_dir: Optional[Path] = None):
    """Convert a svg on disk to a pdf using Selenium and Chromedriver"""

    if out_dir is None:
        out_dir = filename.parents[0]

    service = Service(ChromeDriverManager().install())

    chrome_options.add_argument('--kiosk-printing')
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--window-size=2000x2000")
    chrome_options.add_argument('--disable-dev-shm-usage')

    webdriver_chrome = webdriver.Chrome(
        service=service, options=chrome_options)

    webdriver_chrome.get(f'file://{filename}')
    pdf = webdriver_chrome.execute_cdp_cmd(
        "Page.printToPDF", {
            "printBackground": True,
            "landscape": True,
            "displayHeaderFooter": False,
            "scale": 0.75,
            })
    webdriver_chrome.close()
    with open(out_dir / f'{filename.stem}.pdf', "wb") as f:
        f.write(base64.b64decode(pdf['data']))
        
svg_to_pdf_chromium(OUTPUT / "svg" / "mysvg.svg")

This also allows to remove the ugly wait time.

Options available with Page.printToPDF are listed in the Chrome DevTools docs.

Alex
  • 2,784
  • 2
  • 32
  • 46
1

One more workaround. Just save the file as is and then move and rename it as needed.

Idea of the code below: check creation time of every (pdf) file in download directory, and compare with the time now. If the time delta less than some value (let's say 15 seconds), presumably this is the right file, move/rename the file where you need.

import os
import time
import json
from selenium import webdriver

appState = {
    "recentDestinations": [
        {
            "id": "Save as PDF",
            "origin": "local"
        }
    ],
    "selectedDestinationId": "Save as PDF",
    "version": 2
}

profile = {'printing.print_preview_sticky_settings.appState': json.dumps(appState)}

download_path = r'C:\Users\Oli\Downloads' # Path where browser save files
new_path = r'C:\Users\Oli\Google Drive' # Path where to move file

chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option('prefs', profile)
chrome_options.add_argument('--kiosk-printing')
driver = webdriver.Chrome(chrome_options=chrome_options)

driver.get('http://example.com/')
driver.execute_script('window.print();')

new_filename = 'new_name.pdf' # Set the name of file
timestamp_now = time.time() # time now
# Now go through the files in download directory
for (dirpath, dirnames, filenames) in os.walk(download_path):
    for filename in filenames:
        if filename.lower().endswith(('.pdf')):
            full_path = os.path.join(download_path, filename)
            timestamp_file = os.path.getmtime(full_path) # time of file creation
            # if time delta is less than 15 seconds move this file
            if (timestamp_now - timestamp_file) < 15: 
                full_new_path = os.path.join(new_path, new_filename)
                os.rename(full_path, full_new_path)
                print(full_path+' is moved to '+full_new_path)

Note: it's just an example. You need to think about all you actions. To make the code stable you might need to add some exceptions handling. Better to move this additional code to a function. And so on.

Litvin
  • 330
  • 1
  • 9