I need to collect all links from a webpage as seen below, which also has a load more news button. I wrote my script, but my script gives only the links from the first page, as if it does not click on the load more news button. I updated some of Selenium attributes. I really don't know why I could not get all the links, clicking on load_more button.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from time import sleep
import json
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=options)
url = "..."
base_url = "..."
driver.get(url)
outlinks = []
wait = WebDriverWait(driver, 90)
load_more_button = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.listing-load-more-btn[title="Load More News"]')))
num_links = 0
while True:
links = driver.find_elements(By.CSS_SELECTOR, 'a.text-truncate')
num_links_new = len(links)
if num_links_new > num_links:
num_links = num_links_new
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
load_more_button = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.listing-load-more-btn[title="Load More News"]')))
if load_more_button.is_displayed():
load_more_button.click()
sleep(10)
else:
break
new_links = driver.find_elements(By.CSS_SELECTOR, 'a.text-truncate')
for link in new_links:
href = link.get_attribute('href')
full_url = base_url + href
enurl=full_url.replace("ar-ae", "en")
outlinks.append(enurl)
print(outlinks)
data = json.dumps(outlinks)
with open('outlinks.json', 'w') as f:
f.write(data)
driver.close()