I have already imported requests in def and ran it. But still got a name error... I import all items at a time and sum up all function into one.
#import all the libraries
def import_all_modules():
from bs4 import BeautifulSoup as soup
import requests
import pandas as pd
from google.colab import drive
#Get
def get_html_from_url(url:str):
html=requests.get(url).content
return html
#load the html content
def load_page_and_filter(html):
soup_page=soup(html, "html")
shoes=soup_page.find_all("div",{"class":"good-box"})
return shoes
#Create Pandas DataFrame from HTML
def generate_detaframe_from_soup(soup):
names=[]
prices=[]
for shoe in soup:
names.append(shoe.a.span.text)
prices.append(shoe.div.p.text)
adidas_shoes_dict={
"Name":names,
"Price":prices
}
df= pd.DataFrame(data=adidas_shoes_dict)
df["Price"]=df["Price"].apply(lambda x: float(x.split("\xa0")[-1]))
return df
#save CSV
def save_csv(file_name, df):
drive.mount("/content/drive")
df.to_csv(file_name)
run all the method at once and put them in one function
def run_web_scraping(url,file_name):
import_all_modules()
html=get_html_from_url(url)
soup=load_page_and_filter(html)
df=generate_detaframe_from_soup(soup)
save_csv(file_name, df)
saved URL and file_name for run_web_scrapping
url="https://www.adidas.com.hk/men/shoes/basketball"
file_name="/content/drive/MyDrive/adidas.csv"
run_web_scraping(url,file_name)