import time from bs4
import BeautifulSoup from bs4.element
import Tag
import pip._internal.distributions from selenium
import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support
import expected_conditions as EC from selenium.webdriver.support.ui import
WebDriverWait n = ['2020','2019','2018']
base = 'https://www.bseindia.com' browser =
webdriver.Chrome('/home/anuj/PycharmProjects/firstfrog/chromedriver')
wait = WebDriverWait(browser, 10)
browser.get('https://www.bseindia.com/stock-share-price/financials/annualreports/500104/')
alert_name = browser.find_elements_by_xpath('//*[@class="ng-scope"]/td')
print(alert_name)
for value in alert_name:
if value.text in n:
url_d = browser.find_elements_by_xpath('//*[@class="ng-scope"]/td/td/a')
print(url_d)
print(value.text)## Heading ##
Asked
Active
Viewed 74 times
1

Dev
- 2,739
- 2
- 21
- 34

Anuj Dwivedi
- 7
- 7
-
Welcome to SO, Please read [mcve] and edit your post accordingly. – Dev Nov 02 '20 at 09:07
1 Answers
0
Try this:
from selenium import webdriver
import time
import pandas as pd
url = 'https://www.bseindia.com/stock-share-price/financials/annualreports/500104/'
driver = webdriver.Chrome()
driver.get(url)
time.sleep(2)
url_lst = []
td_tags = driver.find_elements_by_class_name('tdcolumn')
for td in td_tags:
try:
url_lst.append(td.find_element_by_xpath('.//a').get_attribute('href'))
except:
pass
df = pd.read_html(driver.page_source)[-1]
df['Download'] = url_lst
driver.close()
print(df)
Output:
Year Download
0 2020 https://www.bseindia.com/bseplus/AnnualReport/...
1 2019 https://www.bseindia.com/bseplus/AnnualReport/...
2 2018 https://www.bseindia.com/bseplus/AnnualReport/...
3 2017 https://www.bseindia.com/bseplus/AnnualReport/...
4 2016 https://www.bseindia.com/bseplus/AnnualReport/...
5 2015 https://www.bseindia.com/bseplus/AnnualReport/...
6 2014 https://www.bseindia.com/bseplus/AnnualReport/...
7 2013 https://www.bseindia.com/bseplus/AnnualReport/...
8 2012 https://www.bseindia.com/bseplus/AnnualReport/...
9 2011 https://www.bseindia.com/bseplus/AnnualReport/...
10 2010 https://www.bseindia.com/bseplus/AnnualReport/...

Sushil
- 5,440
- 1
- 8
- 26
-
-
In a single url? What do u mean by that? Do u want it as a list or something? – Sushil Nov 02 '20 at 11:23
-
can i print year also along with URL (year URL year URL in this order) – Anuj Dwivedi Nov 11 '20 at 09:06
-
-
1
-
from selenium import webdriver import time url = 'https://www.bseindia.com/stock-share-price/financials/annualreports/500104/' driver = webdriver.Chrome() driver.get(url) time.sleep(2) td_tags = driver.find_elements_by_class_name('tdcolumn') for td in td_tags: try: print(td.find_element_by_xpath('.//a').get_attribute('href')) year = driver.find_element_by_xpath("//tr[@class='ng-scope']") print(year.text) except: pass driver.close() – Anuj Dwivedi Nov 11 '20 at 11:11
-
Getting output like this , year is not coming in decreasing order " https://www.bseindia.com/bseplus/AnnualReport/500002/5000021219.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021218.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021217.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021216.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021215.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021214.pdf 2019 " – Anuj Dwivedi Nov 11 '20 at 11:15
-
-
-
please help me i am trying to get year also along with URL @Sushil in this format " bseindia.com/bseplus/AnnualReport/500002/5000021219.pdf 2019 bseindia.com/bseplus/AnnualReport/500002/5000021218.pdf 2018 – Anuj Dwivedi Nov 13 '20 at 16:30
-
not generating whole pdf link . when i click on it . it show error.The Page you are looking for has been moved BSEINDIA – Anuj Dwivedi Nov 16 '20 at 11:44
-
Tht is how a pandas DataFrame displays data. It does not display the full url as it is very long. U can export this dataframe to a csv file, from where u can copy paste the link – Sushil Nov 16 '20 at 12:20
-
i am trying to export data in mongo , i will take input from mongo and save it into mongo . if by any other idea its possible then let me know. – Anuj Dwivedi Nov 16 '20 at 16:42
-
This thread could help you: https://stackoverflow.com/questions/20167194/insert-a-pandas-dataframe-into-mongodb-using-pymongo – Sushil Nov 17 '20 at 02:46