below is my code in this i have around 9000 records which should print in excel but as they are duplicate values so i am getting around 3000 records (i want all of the records duplicate also) pls help me new to coding
import requests
from collections import defaultdict
from bs4 import BeautifulSoup as bs
end_number = 800
current_page = 1
pdf_links = {}
path = r"C:\Users\deepak jain\Desktop\BID"
with requests.Session() as s:
while True:
r = s.get(f'https://bidplus.gem.gov.in/bidlists?bidlists&page_no={current_page}')
soup = bs(r.content, 'lxml')
for i in soup.select('.bid_no > a'):
pdf_links[i.text.strip().replace('/', '_')] = 'https://bidplus.gem.gov.in' + i['href']
# print(pdf_links)
if current_page == 1:
num_pages = int(soup.select_one('.pagination li:last-of-type > a')['data-ci-pagination-page'])
print(num_pages)
if current_page == num_pages or current_page > end_number:
break
current_page += 1
result = [key for key, values in pdf_links.items()
if len(values) > 1]
print("duplicate values", str(result))
for k, v in pdf_links.items():
with open(f'{path}/{k}.pdf', 'wb') as f:
r = s.get(v)
f.write(r.content)