Save the pdf using the selenium webdriver in python

Question

I am unable to save the pdf open on clicking. According to my, using the selenium webdriver the code works automatically. I want the pdf open should be saved automatically through code using selenium in python. Please assist for the below code to save the pdf in the folder.

enter code here
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import urllib.request
from bs4 import BeautifulSoup
import os
from selenium.webdriver.support.select import Select
import time
url = 'https://maharerait.mahaonline.gov.in'
chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'

driver = webdriver.Chrome(executable_path=chrome_path)
driver.get(url)
WebDriverWait(driver, 
    20).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='search- 
    pro-details']//a[contains(.,'Search Project Details')]"))).click()
Registered_Project_radio= WebDriverWait(driver, 
    10).until(EC.element_to_be_clickable((By.ID,"Promoter")))
driver.execute_script("arguments[0].click();",Registered_Project_radio)
Application = driver.find_element_by_id("CertiNo")
Application.send_keys("P50500000005")
Search = WebDriverWait(driver, 
    10).until(EC.element_to_be_clickable((By.ID,"btnSearch")))
driver.execute_script("arguments[0].click();",Search)
View = [item.get_attribute('href') for item in 
      driver.find_elements_by_tag_name("a") if
      item.get_attribute('href') is not None]

View = View[0]
request = urllib.request.Request(View)
btn = WebDriverWait(driver, 
     20).until(EC.element_to_be_clickable((By.XPATH, 
     "//a[@class='btn btn-md btn-success' and @id='btnShow_2017']")))

driver.execute_script("arguments[0].click();",btn)

usually in web browser you can set what to do with different type of file - download or open with some external program. It should be somewhere in options. In some question I saw how to set it with webdriver using `ChromeOptions` or `ChromeProfile`. — furas, Jul 18 '19 at 07:09
if you can get `href` to this file then you could try to use `urllib` or `requests` to download it. — furas, Jul 18 '19 at 07:10
what is in this pop up window ? Is this window displaying PDF ? It seems page uses ` — furas, Jul 18 '19 at 07:31
Yes the pop window displaying the pdf. But it doesn't have href. — A.D, Jul 18 '19 at 07:35
it doesn't have href because it has all PDF as text (encoded `base64`) directly in `data=` — furas, Jul 18 '19 at 07:41
@furas: I have posted the question related to your answer. But it is unable to download the pdf .Can you please check the link https://stackoverflow.com/questions/57158681/download-the-file-which-has-stream-url-is-the-chrome-extension-in-the-embed-tag — A.D, Jul 23 '19 at 11:19

score 0 · Accepted Answer · answered Jul 18 '19 at 07:42

After clicking it adds <object data="application/pdf;base64,..."> which has all PDF as text encoded bas64 in data=

driver.execute_script("arguments[0].click();",btn)

time.sleep(5)

# get tag <object>
obj = driver.find_element_by_tag_name('object')

# get `data=`
data = obj.get_attribute('data')

# get text after `base64,`
text = data.split(',')[1]

# encode text to PDF's content (as bytes)
import base64
bytes = base64.b64decode(text)

# save bytes in file
with open('output.pdf', 'wb') as fp:
    fp.write(bytes)

And now you have all in output.pdf

Tested of Firefox

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import urllib.request
from bs4 import BeautifulSoup
import os
from selenium.webdriver.support.select import Select
import time

url = 'https://maharerait.mahaonline.gov.in'
#chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'

driver = webdriver.Firefox()#executable_path=chrome_path)
driver.get(url)
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='search-pro-details']//a[contains(.,'Search Project Details')]"))).click()
Registered_Project_radio = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID,"Promoter")))

driver.execute_script("arguments[0].click();",Registered_Project_radio)

Application = driver.find_element_by_id("CertiNo")
Application.send_keys("P50500000005")

Search = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID,"btnSearch")))
driver.execute_script("arguments[0].click();", Search)
View = [item.get_attribute('href') for item in driver.find_elements_by_tag_name("a") if item.get_attribute('href') is not None]

btn = WebDriverWait(driver, 
     20).until(EC.element_to_be_clickable((By.XPATH, 
     "//a[@class='btn btn-md btn-success' and @id='btnShow_2017']")))

driver.execute_script("arguments[0].click();",btn)

time.sleep(5)

obj = driver.find_element_by_tag_name('object')
data = obj.get_attribute('data')
text = data.split(',')[1]

import base64
bytes = base64.b64decode(text)

with open('output.pdf', 'wb') as fp:
    fp.write(bytes)

@furas- I have posted the question related to your answer. But it is unable to download the pdf. Can you please check the link https://stackoverflow.com/questions/57158681/download-the-file-which-has-stream-url-is-the-chrome-extension-in-the-embed-tag — A.D, Jul 23 '19 at 11:21

Save the pdf using the selenium webdriver in python

1 Answers1

Linked