I'm working on a parser platform. I need to download files, save them directly to the FTP server. For this I have to get file-like object. I don't want to save junk temporary files.
I need to use selenium specifically
For example: I need to download this document, but for this I have to enter the data and accept the check.
This code passes notify and saves cookies
import os
import pickle
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
def get_file(driver: webdriver.Chrome, url: str):
driver.set_page_load_timeout(40)
driver.get(url=url)
time.sleep(2)
# accept notify
ccc_accept = driver.find_element(By.ID, 'ccc-notify-accept')
if WebDriverWait(driver, 5).until(ec.element_to_be_clickable(ccc_accept)):
ccc_accept.click()
# Enter some data
WebDriverWait(driver, 2).until(ec.presence_of_element_located((By.ID, 'agreement_form')))
driver.find_element(By.ID, 'contact_name').send_keys('Company')
driver.find_element(By.ID, 'contact_title').send_keys('People')
driver.find_element(By.ID, 'company').send_keys('cb')
driver.find_element(By.ID, 'country').send_keys('some')
WebDriverWait(driver, 5).until(ec.presence_of_element_located(
(By.XPATH, '//*[@id="doc_agreement"]/div[4]/input[1]')))
# accept form
if WebDriverWait(driver, 5).until(
ec.element_to_be_clickable(driver.find_element(By.XPATH, '//*[@id="doc_agreement"]/div[4]/input[1]'))):
driver.find_element(By.XPATH, '//*[@id="doc_agreement"]/div[4]/input[1]').click()
time.sleep(2)
# Save cookie
pickle.dump(driver.get_cookies(), open('cookies.pkl', 'wb'))
time.sleep(10)
On the web I only found a way to download a document via selenium to a local directory. This method can only install file to local_dir.
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
def downloadDriver():
options = webdriver.ChromeOptions()
options.add_argument('window-size=1920x1080')
options.add_argument("disable-gpu")
path_loc = os.path.join(os.getcwd(), "temp")
chrome_prefs = {
"download.prompt_for_download": False,
"plugins.always_open_pdf_externally": True,
"download.open_pdf_in_system_reader": False,
"profile.default_content_settings.popups": 0,
"download.default_directory": path_loc,
}
options.add_experimental_option("prefs", chrome_prefs)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
return driver
I tried to get the file object via urllib.request.urlopen()
, but it throws a 403 error
I also tried passing a cookie from Selenium to urllib, but this didn't solve the problem.
In what way can I get a stream or a file-like object or bytes, anything ?