I'm a beginner with Python & trying to learn with a BeautifulSoup webscraping project.
I'm looking to scrape the record item title, URL of item & purchase date from this URL & export to a CSV.
I made great progress with scraping title & URL but just cannot figure out how to properly code the purchase date info correctly in my for loop (purchase_date variable below).
What's currently happening is the data in the csv file for the purchase date (e.g. p_date title) just displays blank cells with no text.. no error message just no data getting put into csv. Any guidance is much appreciated.
Thank you!!
import requests
from requests import get
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
headers = {"Accept-Language": "en-US, en;q=0.5"}
url = "https://www.popsike.com/php/quicksearch.php?searchtext=metal+-signed+-promo+-beatles+-zeppelin+-acetate+-test+-sinatra&sortord=aprice&pagenum=1&incldescr=1&sprice=100&eprice=&endfrom=2020&endthru=2020&bidsfrom=&bidsthru=&layout=&flabel=&fcatno="
results = requests.get(url, headers=headers)
soup = BeautifulSoup(results.text, "html.parser")
title = []
date = []
URL = []
record_div = soup.find_all('div', class_='col-md-7 add-desc-box')
for container in record_div:
description = container.a.text
title.append(description)
link = container.find('a')
URL.append(link.get('href'))
purchase_date = container.find('span',class_= 'info-row').text
date.append(purchase_date)
test_data = pd.DataFrame({
'record_description': title,
'link': URL,
'p_date': date
})
test_data['link'] = test_data['link'].str.replace('../','https://www.popsike.com/',1)
print(test_data)
test_data.to_csv('popaaron.csv')