I am trying to scrape "the north face" website for a group project and I am looking for a faster way to get the output faster. Is there any faster way without opening a chrome web page every time I am getting the html of a page ? I can't use requests cause it doesn't give me the FULL source code. Thank for the help. This is what I have:
import requests
from bs4 import BeautifulSoup
from helium import *
import time
# To tell the API that I am a user using Google Chrome.
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"}
# open Chrome in the back ground.
browser = start_chrome("https://www.thenorthface.com/shop/mens-jackets-vests-en-ca#facet=&beginIndex=0", headless=True)
# Click on the "LOAD MORE" button to load all the products in the page.
while Text("LOAD MORE").exists():
click("LOAD MORE")
time.sleep(2.0)
# get the html source of the page
html = browser.page_source
kill_browser()
# creat a soup object
soup = BeautifulSoup(html, "html.parser")
# print(soup.prettify())
# soup object for all products
products_cards = soup.find_all("div", {"class": "product-block-info info info-js"})
# print(products_cards)
products_names = []
products_links = []
products_prices = []
for card in products_cards:
for name in card.find_all("div", {"class": "product-block-name name name-js"}):
for i in name.find_all("a", class_="product-block-name-link"):
# print(i.get("title"))
products_names.append(i.get("title"))
# print(i.get("href"))
products_links.append(i.get("href"))
# soup object for specific product
# product_soup = BeautifulSoup(html, "html.parser")
#!!!!!!!!!!!!!!!!
for jacket_url in products_links[:3]:
browser = start_chrome(jacket_url, headless=True)
html = browser.page_source
kill_browser()
product_soup = BeautifulSoup(html, "html.parser")
price_info = product_soup.find_all("div", class_="product-content-info-price product-price product-price-js")
for info in price_info:
for price in info.find("span", "product-content-info-offer-price offer-price offer-price-js product-price-amount-js"):
products_prices.append(price)
print(len(products_prices))
print(len(products_names))
print(len(products_links)) ```