I've written some script in python using selenium to scrape name and price of different products from redmart website. My scraper clicks on a link, goes to its target page, parses data from there. However, the issue I'm facing with this crawler is it scrapes very few items from a page because of the webpage's slow-loading method. How can I get all the data from each page controlling the lazy-loading process? I tried with "execute script" method but i did it wrongly. Here is the script I'm trying with:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://redmart.com/bakery")
wait = WebDriverWait(driver, 10)
counter = 0
while True:
try:
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "li.image-facets-pill")))
driver.find_elements_by_css_selector('img.image-facets-pill-image')[counter].click()
counter += 1
except IndexError:
break
# driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
for elems in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "li.productPreview"))):
name = elems.find_element_by_css_selector('h4[title] a').text
price = elems.find_element_by_css_selector('span[class^="ProductPrice__"]').text
print(name, price)
driver.back()
driver.quit()