-3

I have already imported requests in def and ran it. But still got a name error... I import all items at a time and sum up all function into one.

    #import all the libraries
    def import_all_modules():
      from bs4 import BeautifulSoup as soup
      import requests 
      import pandas as pd
      from google.colab import drive
    
    #Get
    def get_html_from_url(url:str):
      html=requests.get(url).content
      return html
    
    
    #load the html content
    def load_page_and_filter(html):
      soup_page=soup(html, "html")    
      shoes=soup_page.find_all("div",{"class":"good-box"})
      return shoes
    
    #Create Pandas DataFrame from HTML
    def generate_detaframe_from_soup(soup):
      names=[]
      prices=[]
      for shoe in soup:
        names.append(shoe.a.span.text)
        prices.append(shoe.div.p.text)
      
      adidas_shoes_dict={
          "Name":names,
          "Price":prices
      } 
      df= pd.DataFrame(data=adidas_shoes_dict)
      df["Price"]=df["Price"].apply(lambda x: float(x.split("\xa0")[-1]))
      return df
      
    #save CSV
    def save_csv(file_name, df):
      drive.mount("/content/drive")
      df.to_csv(file_name)

run all the method at once and put them in one function


    def run_web_scraping(url,file_name):
      import_all_modules()
      html=get_html_from_url(url)
      soup=load_page_and_filter(html)
      df=generate_detaframe_from_soup(soup)
      save_csv(file_name, df)

saved URL and file_name for run_web_scrapping


    url="https://www.adidas.com.hk/men/shoes/basketball"
    file_name="/content/drive/MyDrive/adidas.csv"
    run_web_scraping(url,file_name)
yu mak
  • 1
  • 2

1 Answers1

1

Imports are binded into the current scope, so if you do your imports in a function, they will not be available once you exit the function.

Just put them at the top of the file, not in a function, and it should work great.

Lenormju
  • 4,078
  • 2
  • 8
  • 22