-1

I was not able to download the excel file using Selenium because of the popped up's save window. Could anyone give me some advice on the codes I wrote? I tried many codes template online to fix the problem but all of them did not work out. Thank you so so much for your help.

This is a python snippet for extracting 13F information (asset holding positions) from those huge financial institutions. I have provided the account and password for you in the code snippet if you would like to try it out.

adjust_symbol = "Q4 2020.xlsx"

import selenium
from selenium import webdriver
import time 
import re
from selenium.webdriver.common.action_chains import ActionChains
from pathlib import Path
import time 
import pandas as pd
import numpy as np

pd.set_option('display.max_colwidth', -1)
pd.set_option('max_columns', None)
pd.set_option('display.max_rows', None)



url = ['https://whalewisdom.com/filer/bridgewater-associates-inc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/soros-fund-management-llc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/yacktman-asset-management-co-il#tabholdings_tab_link',
       'https://whalewisdom.com/filer/citadel-advisors-llc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/third-point-llc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/point72-asset-management-lp#tabholdings_tab_link',
       'https://whalewisdom.com/filer/tiger-management-llc-ny#tabholdings_tab_link',
       'https://whalewisdom.com/filer/fisher-asset-management-llc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/greenwoods-asset-management-ltd#tabholdings_tab_link',
       'https://whalewisdom.com/filer/dorsey-asset-management-llc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/hillhouse-capital-advisors-ltd#tabholdings_tab_link',
       'https://whalewisdom.com/filer/renaissance-technologies-llc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/scion-asset-management-llc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/duquesne-family-office-llc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/appaloosa-management-lp#tabholdings_tab_link',
       'https://whalewisdom.com/filer/berkshire-hathaway-inc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/perceptive-advisors-llc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/greenlight-capital-inc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/tiger-global-management-llc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/baker-bros-advisors-llc#tabholdings_tab_link',
       'https://whalewisdom.com/filer/pointstate-capital-lp#tabholdings_tab_link',
       'https://whalewisdom.com/filer/pershing-square-capital-management-l-p#tabholdings_tab_link',
       'https://whalewisdom.com/filer/gotham-asset-management-llc#tabholdings_tab_link']

for i in url:
    try:
        
        driver = webdriver.Firefox()
        driver.get(i)
        time.sleep(1)
        #time.sleep(20)
        driver.find_element_by_id("lnk-login").click()
        time.sleep(1)
        driver.find_element_by_id("login").send_keys('martin.chow@ipartners.hk')
        time.sleep(1)
        driver.find_element_by_id("password").send_keys('ts262626')
        time.sleep(3)

        #If came across a class, use the following xpath to access that object
        driver.find_element_by_xpath('/html/body/section[1]/header/div/div/div[2]/div[2]/div/div/div/div/div[1]/div/div/div[4]/div/a[1]').click()
        time.sleep(3)
        #time.sleep(20)
        elem = driver.find_element_by_xpath('//*[@id="cmd-export-xlsx"]')

        actions = ActionChains(driver)
        actions.click(elem).perform()
        time.sleep(3)
        #time.sleep(20)
        
    except:
        driver = webdriver.Firefox()
        driver.get(i)
        time.sleep(1)
        #time.sleep(20)
        driver.find_element_by_id("lnk-login").click()
        time.sleep(1)
        driver.find_element_by_id("login").send_keys('martin.chow@ipartners.hk')
        time.sleep(1)
        driver.find_element_by_id("password").send_keys('ts262626')
        time.sleep(3)

        #If came across a class, use the following xpath to access that object
        driver.find_element_by_xpath('/html/body/section[1]/header/div/div/div[2]/div[2]/div/div/div/div/div[1]/div/div/div[4]/div/a[1]').click()
        time.sleep(3)
        #time.sleep(20)
        elem = driver.find_element_by_xpath('//*[@id="cmd-export-xlsx"]')

        actions = ActionChains(driver)
        actions.click(elem).perform()
        time.sleep(3)
        #time.sleep(20)

This is the popped up window I was not able to block it as everytime new browser will be formed and the file is not downloaded directly before I pressed on "OK".

example screenshot

GramThanos
  • 3,572
  • 1
  • 22
  • 34
Soother
  • 19
  • 2

1 Answers1

0

You could do the following to wait for the popup.

    try:
        wait.until(EC.element_to_be_clickable((By.CLASS_NAME,"dfwid-close"))).click()
    except:
        print('No popup')
        continue

Imports

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC

Got it kind of working with:

wait = WebDriverWait(driver, 5)

for i in url:
    try:
        time.sleep(5)
        driver.get(i)
        try:
            wait.until(EC.element_to_be_clickable((By.CLASS_NAME,"dfwid-close"))).click()
        except Exception as e:
            print(str(e))
            print('No popup')
        try:
            print('Trying to log in')
            wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#lnk-login"))).click()
            wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#login"))).send_keys('martin.chow@ipartners.hk')
            wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#password"))).send_keys('ts262626')
        except Exception as e:
            print(str(e))
            print('Failed log')
        try:
            print('Opening page')
            try:
                wait.until(EC.element_to_be_clickable((By.XPATH,"/html/body/section[1]/header/div/div/div[2]/div[2]/div/div/div/div/div[1]/div/div/div[4]/div/a[1]"))).click()
            except Exception as e:
                print(str(e))
            print('Attempting to download')
            wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#cmd-export-xlsx"))).click()
            
    except Exception as e:
        print(str(e))
        print('Failed download')
except Exception as e:
    print(str(e))
    pass
Arundeep Chohan
  • 9,779
  • 5
  • 15
  • 32
  • Thank you Arundeep. I might have confused you but the pop up window is actually the open/save window. Please see the following photo. Do you have any idea I would be able to automatically click onto the "OK"? – Soother Mar 18 '21 at 16:43