0

I'm trying do download every single lesson from this page but I'm starting small. Even though, I can't even download one lesson without iteration. It justs closes after getting to the page, I've copied the XPATH from the Mozilla and it seems okay. Could you guys help me? Also, what would be the best way to iterate to get the navigation in all lesson pages.

from selenium.webdriver.common.keys import Keys

driver = webdriver.Firefox()

driver.get("https://www.dw.com/en/learn-german/deutsch-warum-nicht-series-3/s-2552")

try:
    element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, '//*[@id="bodyContent"]/div[1]/div/div/div[2]/a/h2'))
    )
    element.click()
    
    element2 = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, '//*[@id="bodyContent"]/div[1]/div[5]/div[2]/a/h2'))
    )
    element2.click()
    
except:
    driver.quit()
luka1156
  • 181
  • 1
  • 8

1 Answers1

0

The behavior is basically, you'd have to click on first file and it will redirect you to a new page within same tab and then you can click on downloads, and again we have to come back to first page and do this all over again for next elements. Please see a sample code below :-

Sample code :

driver = webdriver.Chrome(driver_path)
driver.maximize_window()
#driver.implicitly_wait(30)
wait = WebDriverWait(driver, 30)
driver.get("https://www.dw.com/en/learn-german/deutsch-warum-nicht-series-3/s-2552")
try:
    wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.cookie__btn.cookie__btn--ok"))).click()
except:
    pass

number_of_links = driver.find_elements(By.TAG_NAME, "h2")
j = 1
for i in range(len(number_of_links)):
    element = wait.until(EC.element_to_be_clickable((By.XPATH, f"(//h2)[{j}]")))
    #driver.execute_script("arguments[0].scrollIntoView(true);", element)
    element.click()
    time.sleep(2)
    wait.until(EC.element_to_be_clickable((By.XPATH, "//h2/parent::a[starts-with(@href,'https')]"))).click()
    time.sleep(2)
    driver.execute_script("window.history.go(-1)")
    j = j + 1

Imports :

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

Update :

to run in firefox you'd need firefox profile.

import time

from selenium.webdriver import DesiredCapabilities, FirefoxProfile
from selenium.webdriver.common.keys import Keys

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

profile = FirefoxProfile()
profile.set_preference("browser.download.panel.shown", False)
profile.set_preference("browser.helperApps.neverAsk.openFile","text/csv,application/vnd.ms-excel")
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/msword, application/csv, application/ris, text/csv, image/png, application/pdf, text/html, text/plain, application/zip, application/x-zip, application/x-zip-compressed, application/download, application/octet-stream");
profile.set_preference("browser.download.manager.showWhenStarting", False);
profile.set_preference("browser.download.manager.alertOnEXEOpen", False);
profile.set_preference("browser.download.manager.focusWhenStarting", False);
profile.set_preference("browser.download.folderList", 2);
profile.set_preference("browser.download.useDownloadDir", True);
profile.set_preference("browser.helperApps.alwaysAsk.force", False);
profile.set_preference("browser.download.manager.alertOnEXEOpen", False);
profile.set_preference("browser.download.manager.closeWhenDone", True);
profile.set_preference("browser.download.manager.showAlertOnComplete", False);
profile.set_preference("browser.download.manager.useWindow", False);
profile.set_preference("services.sync.prefs.sync.browser.download.manager.showWhenStarting", False);
profile.set_preference("pdfjs.disabled", True);
profile.set_preference("browser.download.dir", "C:\\Users\\****\\***\\Desktop\\Automation")
driver = webdriver.Firefox(firefox_profile = profile, executable_path = "geckodriver.exe full file path")


driver.maximize_window()
driver.implicitly_wait(30)
wait = WebDriverWait(driver, 20)


driver.get("https://www.dw.com/en/learn-german/deutsch-warum-nicht-series-3/s-2552")
try:
    wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.cookie__btn.cookie__btn--ok"))).click()
except:
    pass

number_of_links = driver.find_elements(By.TAG_NAME, "h2")
j = 1
k = 0
for i in range(len(number_of_links)):
    element = wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//h2")))
    #driver.execute_script("arguments[0].scrollIntoView(true);", element)
    element[k].click()
    time.sleep(2)
    ele = wait.until(EC.element_to_be_clickable((By.XPATH, "//h2/parent::a[starts-with(@href,'https')]")))
    driver.execute_script("arguments[0].scrollIntoView(true);", ele)
    driver.execute_script("arguments[0].click();", ele)
    time.sleep(2)
    driver.execute_script("window.history.go(-1)")
    j = j + 1
    k = k + 1
cruisepandey
  • 28,520
  • 6
  • 20
  • 38
  • I switched to the Firefox drive and it didn't download the .pdf. With this message "ElementNotInteractableException: Message: Element could not be scrolled into view". But it seems that it is well into view. – luka1156 Sep 28 '21 at 18:20
  • I did remove the comment to the part of the code that is supposed to roll but it doesn't work also. – luka1156 Sep 28 '21 at 18:25
  • I tried in chrome and it did work. For firefox, whenever a download occur, you would typically see a download pop up, you can handle that using Firefox options, please se here https://stackoverflow.com/questions/25251583/downloading-file-to-specified-location-with-selenium-and-python the logic should remain same irrespective of browser. – cruisepandey Sep 28 '21 at 18:32
  • 1
    I'll try using Chrome. Thanks for the answers mate! – luka1156 Sep 28 '21 at 18:54
  • @luka1156 : I have implemented this to run on firefox also, you'd have to make changes in two places first is `profile.set_preference("browser.download.dir", "C:\\Users\\****\\***\\Desktop\\Automation")` and second here `driver = webdriver.Firefox(firefox_profile = profile, executable_path = "geckodriver.exe full file path")` – cruisepandey Sep 29 '21 at 07:20