1
import time from bs4 
import BeautifulSoup from bs4.element 
import Tag
import pip._internal.distributions from selenium 
import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support 
import expected_conditions as EC from selenium.webdriver.support.ui import 
WebDriverWait n = ['2020','2019','2018'] 
base = 'https://www.bseindia.com' browser =
webdriver.Chrome('/home/anuj/PycharmProjects/firstfrog/chromedriver')
wait = WebDriverWait(browser, 10)
browser.get('https://www.bseindia.com/stock-share-price/financials/annualreports/500104/')
alert_name = browser.find_elements_by_xpath('//*[@class="ng-scope"]/td')
print(alert_name) 
for value in alert_name:       
       if value.text in n:             
           url_d = browser.find_elements_by_xpath('//*[@class="ng-scope"]/td/td/a')
           print(url_d)
           print(value.text)## Heading ##
Dev
  • 2,739
  • 2
  • 21
  • 34

1 Answers1

0

Try this:

from selenium import webdriver
import time
import pandas as pd

url = 'https://www.bseindia.com/stock-share-price/financials/annualreports/500104/'

driver = webdriver.Chrome()
driver.get(url)
time.sleep(2)

url_lst = []

td_tags = driver.find_elements_by_class_name('tdcolumn')

for td in td_tags:
    try:
        url_lst.append(td.find_element_by_xpath('.//a').get_attribute('href'))
    except:
        pass

df = pd.read_html(driver.page_source)[-1]
df['Download'] = url_lst

driver.close()

print(df)

Output:

    Year                                           Download
0   2020  https://www.bseindia.com/bseplus/AnnualReport/...
1   2019  https://www.bseindia.com/bseplus/AnnualReport/...
2   2018  https://www.bseindia.com/bseplus/AnnualReport/...
3   2017  https://www.bseindia.com/bseplus/AnnualReport/...
4   2016  https://www.bseindia.com/bseplus/AnnualReport/...
5   2015  https://www.bseindia.com/bseplus/AnnualReport/...
6   2014  https://www.bseindia.com/bseplus/AnnualReport/...
7   2013  https://www.bseindia.com/bseplus/AnnualReport/...
8   2012  https://www.bseindia.com/bseplus/AnnualReport/...
9   2011  https://www.bseindia.com/bseplus/AnnualReport/...
10  2010  https://www.bseindia.com/bseplus/AnnualReport/...
Sushil
  • 5,440
  • 1
  • 8
  • 26
  • can we combine these multiple url in single url – Anuj Dwivedi Nov 02 '20 at 10:56
  • In a single url? What do u mean by that? Do u want it as a list or something? – Sushil Nov 02 '20 at 11:23
  • can i print year also along with URL (year URL year URL in this order) – Anuj Dwivedi Nov 11 '20 at 09:06
  • Yes u can do it. Should I help u with it? – Sushil Nov 11 '20 at 09:16
  • 1
    Yes need your help – Anuj Dwivedi Nov 11 '20 at 09:18
  • from selenium import webdriver import time url = 'https://www.bseindia.com/stock-share-price/financials/annualreports/500104/' driver = webdriver.Chrome() driver.get(url) time.sleep(2) td_tags = driver.find_elements_by_class_name('tdcolumn') for td in td_tags: try: print(td.find_element_by_xpath('.//a').get_attribute('href')) year = driver.find_element_by_xpath("//tr[@class='ng-scope']") print(year.text) except: pass driver.close() – Anuj Dwivedi Nov 11 '20 at 11:11
  • Getting output like this , year is not coming in decreasing order " https://www.bseindia.com/bseplus/AnnualReport/500002/5000021219.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021218.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021217.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021216.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021215.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021214.pdf 2019 " – Anuj Dwivedi Nov 11 '20 at 11:15
  • Anybody can help me ? – Anuj Dwivedi Nov 13 '20 at 04:21
  • @AnujDwivedi Yes. I will help u. – Sushil Nov 13 '20 at 05:12
  • please help me i am trying to get year also along with URL @Sushil in this format " bseindia.com/bseplus/AnnualReport/500002/5000021219.pdf 2019 bseindia.com/bseplus/AnnualReport/500002/5000021218.pdf 2018 – Anuj Dwivedi Nov 13 '20 at 16:30
  • not generating whole pdf link . when i click on it . it show error.The Page you are looking for has been moved BSEINDIA – Anuj Dwivedi Nov 16 '20 at 11:44
  • Tht is how a pandas DataFrame displays data. It does not display the full url as it is very long. U can export this dataframe to a csv file, from where u can copy paste the link – Sushil Nov 16 '20 at 12:20
  • i am trying to export data in mongo , i will take input from mongo and save it into mongo . if by any other idea its possible then let me know. – Anuj Dwivedi Nov 16 '20 at 16:42
  • This thread could help you: https://stackoverflow.com/questions/20167194/insert-a-pandas-dataframe-into-mongodb-using-pymongo – Sushil Nov 17 '20 at 02:46