0

I was scraping data from a website and I want to store this in a format like JSON, excel, sqlite or text format so that the data looks organised and sensible. Please help me.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome()
driver.get('https://www.amazon.in/Skybags-Brat-Black-Casual-Backpack/dp/B08Z1HHHTD/ref=sr_1_2?dchild=1&keywords=skybags&qid=1627786382&sr=8-2')

product_title = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.ID, "productTitle"))).text

print(product_title)

WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@data-hook='see-all-reviews-link-foot']"))).click()
    
while True:
    for item in WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "[data-hook='review']"))):
        reviewer = item.find_element_by_css_selector("span.a-profile-name").text
        review = ' '.join([i.text.strip() for i in item.find_elements_by_xpath(".//span[@data-hook='review-body']")])
        print(reviewer,review)

    try:
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//*[@data-hook='pagination-bar']//a[contains(@href,'/product-reviews/') and contains(text(),'Next page')]"))).click()
        WebDriverWait(driver, 10).until(EC.staleness_of(item))
    except Exception as e:
        break

driver.quit()
krishan
  • 86
  • 7

1 Answers1

1

Store the values product_title, review and reviewer in a dictionary and convert that to a Json format using json module.

You may store the data in this format and finally convert the list to JSON.

lst = [{"product_title": <title>, "reviews": [{"review": <review>, "reviewer": <reviewer>}, {"review": <review>, "reviewer": <reviewer>}....]
import json
json.dumps(lst)

To write the data to a JSON file

with open('data.json', 'w', encoding='utf-8') as f:
    json.dump(lst , f, ensure_ascii=False)
Ram
  • 4,724
  • 2
  • 14
  • 22