My problem is that only the most recent url request is saved. How can I save all the responses? I tried using df.to_csv('complete.csv', 'a')
but that creates a jumbled file.
# imports
import requests
from bs4 import BeautifulSoup
import pandas as pd
# main code
with open('list.txt', 'r') as f_in:
for line in map(str.strip, f_in):
if not line:
continue
response = requests.get(line)
data = response.text
soup = BeautifulSoup(data, 'html.parser')
linecodes = []
partnos = []
for tbody in soup.select('tbody[id^="listingcontainer"]'):
tmp = tbody.find('span', class_='listing-final-manufacturer')
linecodes.append(tmp.text if tmp else '-')
tmp = tbody.find('span', class_='listing-final-partnumber as-link-if-js buyers-guide-color')
partnos.append(tmp.text if tmp else '-')
# create dataframe
df = pd.DataFrame(zip(linecodes,partnos), columns=['linecode', 'partno'])
# save to csv
df.to_csv('complete.csv')
print(df)
list.txt
https://www.rockauto.com/en/catalog/ford,2010,f-150,6.2l+v8,1447337,brake+&+wheel+hub,brake+pad,1684
https://www.rockauto.com/en/catalog/ford,2015,f-150,5.0l+v8,3308775,brake+&+wheel+hub,brake+pad,1684