I have a python code to extract data from a website and write it to a csv file. The code works fine but now I would like to iterate a list of webpages to collect more data with the same structure.
My code is:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import datetime
# page = requests.get('https://www.pccomponentes.com/procesadores/amd/socket-am4')
# page = requests.get('https://www.pccomponentes.com/placas-base/amd-x570/atx')
url_list = [
'https://www.pccomponentes.com/procesadores/socket-am4'
'https://www.pccomponentes.com/discos-duros/500-gb/conexiones-m-2/disco-ssd/internos'
'https://www.pccomponentes.com/discos-duros/1-tb/conexiones-m-2/disco-ssd/internos'
'https://www.pccomponentes.com/placas-base/amd-b550/atx'
'https://www.pccomponentes.com/placas-base/amd-x570/atx'
'https://www.pccomponentes.com/memorias-ram/16-gb/kit-2x8gb'
'https://www.pccomponentes.com/ventiladores-cpu'
'https://www.pccomponentes.com/fuentes-alimentacion/850w/fuente-modular'
'https://www.pccomponentes.com/fuentes-alimentacion/750w/fuente-modular'
'https://www.pccomponentes.com/cajas-pc/atx/con-ventana/sin-ventana'
]
for link in url_list:
r = requests.get(link)
# r.encoding = 'utf-8'
# html_content = r.text
# soup = BS(html_content, 'lxml')
# table = soup.find('table', class_='bigborder')
soup = BeautifulSoup(page.content,'html.parser')
#print(soup)
product = soup.find(id = 'articleListContent')
#print(product)
items = product.find_all(class_='c-product-card__content')
#print(items[0])
# print(items[0].find(class_ = 'c-product-card__header').get_text())
# print(items[0].find(class_ = 'c-product-card__prices cy-product-price').get_text())
# print(items[0].find(class_ = 'c-product-card__availability disponibilidad-inmediata cy-product-availability-date').get_text())
# print(items[0].find(class_ = 'c-star-rating__text cy-product-text').get_text())
# print(items[0].find(class_ = 'c-star-rating__text cy-product-rating-result').get_text())
product_name = [item.find(class_ = 'c-product-card__header').get_text() for item in items]
price = [item.find(class_ = 'c-product-card__prices cy-product-price').get_text() for item in items]
# availability = [item.find(class_ = 'c-product-card__availability disponibilidad-inmediata cy-product-availability-date').get_text() for item in items]
rating = [item.find(class_ = 'c-star-rating__text cy-product-text').get_text() for item in items]
opinion = [item.find(class_ = 'c-star-rating__text cy-product-rating-result').get_text() for item in items]
# print(product_name)
# print(price)
# print(availability)
# print(rating)
# print(opinion)
store = 'PCComponentes'
extraction_date = datetime.datetime.now()
data_PCCOMP = pd.DataFrame (
{
'product_name' : product_name,
'price' : price,
# 'availability' : availability,
'rating' : rating,
'opinion' : opinion,
'store' : store,
'date_extraction' : extraction_date,
})
# site = ‘mysite’
path = "C:\PriceTracking\pccomp\\"
# now = datetime.datetime.now()
mydate = extraction_date.strftime('%Y%m%d')
mytime = extraction_date.strftime('%H%M%S')
filename = path+store+'_'+mydate+'_'+mytime+".csv"
data_PCCOMP.to_csv(filename)
#print(data_PCCOMP)
How can I iterate in order to insert all data from the urls in the same csv?
Any help would be much appreciated.