I was not able to download the excel file using Selenium because of the popped up's save window. Could anyone give me some advice on the codes I wrote? I tried many codes template online to fix the problem but all of them did not work out. Thank you so so much for your help.
This is a python snippet for extracting 13F information (asset holding positions) from those huge financial institutions. I have provided the account and password for you in the code snippet if you would like to try it out.
adjust_symbol = "Q4 2020.xlsx"
import selenium
from selenium import webdriver
import time
import re
from selenium.webdriver.common.action_chains import ActionChains
from pathlib import Path
import time
import pandas as pd
import numpy as np
pd.set_option('display.max_colwidth', -1)
pd.set_option('max_columns', None)
pd.set_option('display.max_rows', None)
url = ['https://whalewisdom.com/filer/bridgewater-associates-inc#tabholdings_tab_link',
'https://whalewisdom.com/filer/soros-fund-management-llc#tabholdings_tab_link',
'https://whalewisdom.com/filer/yacktman-asset-management-co-il#tabholdings_tab_link',
'https://whalewisdom.com/filer/citadel-advisors-llc#tabholdings_tab_link',
'https://whalewisdom.com/filer/third-point-llc#tabholdings_tab_link',
'https://whalewisdom.com/filer/point72-asset-management-lp#tabholdings_tab_link',
'https://whalewisdom.com/filer/tiger-management-llc-ny#tabholdings_tab_link',
'https://whalewisdom.com/filer/fisher-asset-management-llc#tabholdings_tab_link',
'https://whalewisdom.com/filer/greenwoods-asset-management-ltd#tabholdings_tab_link',
'https://whalewisdom.com/filer/dorsey-asset-management-llc#tabholdings_tab_link',
'https://whalewisdom.com/filer/hillhouse-capital-advisors-ltd#tabholdings_tab_link',
'https://whalewisdom.com/filer/renaissance-technologies-llc#tabholdings_tab_link',
'https://whalewisdom.com/filer/scion-asset-management-llc#tabholdings_tab_link',
'https://whalewisdom.com/filer/duquesne-family-office-llc#tabholdings_tab_link',
'https://whalewisdom.com/filer/appaloosa-management-lp#tabholdings_tab_link',
'https://whalewisdom.com/filer/berkshire-hathaway-inc#tabholdings_tab_link',
'https://whalewisdom.com/filer/perceptive-advisors-llc#tabholdings_tab_link',
'https://whalewisdom.com/filer/greenlight-capital-inc#tabholdings_tab_link',
'https://whalewisdom.com/filer/tiger-global-management-llc#tabholdings_tab_link',
'https://whalewisdom.com/filer/baker-bros-advisors-llc#tabholdings_tab_link',
'https://whalewisdom.com/filer/pointstate-capital-lp#tabholdings_tab_link',
'https://whalewisdom.com/filer/pershing-square-capital-management-l-p#tabholdings_tab_link',
'https://whalewisdom.com/filer/gotham-asset-management-llc#tabholdings_tab_link']
for i in url:
try:
driver = webdriver.Firefox()
driver.get(i)
time.sleep(1)
#time.sleep(20)
driver.find_element_by_id("lnk-login").click()
time.sleep(1)
driver.find_element_by_id("login").send_keys('martin.chow@ipartners.hk')
time.sleep(1)
driver.find_element_by_id("password").send_keys('ts262626')
time.sleep(3)
#If came across a class, use the following xpath to access that object
driver.find_element_by_xpath('/html/body/section[1]/header/div/div/div[2]/div[2]/div/div/div/div/div[1]/div/div/div[4]/div/a[1]').click()
time.sleep(3)
#time.sleep(20)
elem = driver.find_element_by_xpath('//*[@id="cmd-export-xlsx"]')
actions = ActionChains(driver)
actions.click(elem).perform()
time.sleep(3)
#time.sleep(20)
except:
driver = webdriver.Firefox()
driver.get(i)
time.sleep(1)
#time.sleep(20)
driver.find_element_by_id("lnk-login").click()
time.sleep(1)
driver.find_element_by_id("login").send_keys('martin.chow@ipartners.hk')
time.sleep(1)
driver.find_element_by_id("password").send_keys('ts262626')
time.sleep(3)
#If came across a class, use the following xpath to access that object
driver.find_element_by_xpath('/html/body/section[1]/header/div/div/div[2]/div[2]/div/div/div/div/div[1]/div/div/div[4]/div/a[1]').click()
time.sleep(3)
#time.sleep(20)
elem = driver.find_element_by_xpath('//*[@id="cmd-export-xlsx"]')
actions = ActionChains(driver)
actions.click(elem).perform()
time.sleep(3)
#time.sleep(20)
This is the popped up window I was not able to block it as everytime new browser will be formed and the file is not downloaded directly before I pressed on "OK".