calling class of 'btn--alt' when you go to second page will not work as this is the same class name for both buttons 'Next' and 'Previous', and it was clicking on previous button and return me back again !!
below code change worked for me perfectly
nextButton = driver.find_element_by_xpath('//input[@value="Next"]')
nextButton.click()
full function:
def duckduckGoSearch(query,searchPages = None,filterTheSearch = False,searchFilter = None):
URL_ = 'https://duckduckgo.com/html?'
driver = webdriver.Chrome()
driver.get(URL_)
query = query
searchResults = {}
filterTheSearch = filterTheSearch
searchFilter = searchFilter
searchFilter = searchFilter
# # click on search textBox
# item = driver.find_element_by_xpath('//*[@id="sb_form_q"]').click()
#
# #Enter your search query
item = driver.find_element_by_xpath('//*[@id="search_form_input_homepage"]').send_keys(query)
# # Click enter to perform the search process
item = driver.find_element_by_xpath('//*[@id="search_form_input_homepage"]').send_keys(Keys.RETURN)
time.sleep(2)
page_number = 1
while True:
# loop for the required number of pages
if page_number <= searchPages:
try:
nextButton = driver.find_element_by_xpath('//input[@value="Next"]')
nextButton.click()
page_number += 1
try:
webPageSource = driver.page_source
# parse and get the urls for the results
soup = BeautifulSoup(webPageSource, "html.parser")
Data_Set_div_Tags = soup.findAll('h2') + soup.findAll('div', {'class': 'result__body links_main links_deep'})
for i in range(0, len(Data_Set_div_Tags)):
try:
resultDescription = Data_Set_div_Tags[i].findAll('a')[0].text
resultURL = Data_Set_div_Tags[i].findAll('a')[0]['href']
except:
print('nothing to parse')
pass
if resultURL not in searchResults.keys():
if filterTheSearch:
if searchFilter in resultURL:
searchResults[resultURL] = resultDescription
else:
searchResults[resultURL] = resultDescription
except:
print('search is done , found ', len(searchResults), 'Results')
break
# pass
except: # change something so it stops scrolling
print('search is done , found ', len(searchResults), 'Results')
print('no more pages')
driver.quit()
break
else:
print('search is done , found ', len(searchResults), 'Results')
driver.quit()
break
return searchResults