I got this code to work to scrape a table on a webpage, which I'm very happy with. However, on a rare occasion, a title might miss a 'genre' or an 'image URL' field. As soon as the scraper hits an item in the list that has a missing value it discontinues and gives me the 'NoneType' object has no attribute 'text'
error.
How can I amend this code for it to continue scraping and just pass a N/A value for that specific column if a value is missing.
Your help is much appreciated!
from bs4 import BeautifulSoup
import pandas as pd
# Send a GET request to the URL
url = "https://www.hebban.nl/rank"
response = requests.get(url,headers={'user-agent':'Mozilla/5.0'})
# Parse the HTML content using Beautiful Soup
soup = BeautifulSoup(response.content, 'html.parser')
# Find the book titles, authors, and image url links
data = []
books = soup.find_all('div', class_='item')
for book in books:
rank = book.h3.text.strip()
title = book.find('a', class_='neutral').text.strip()
author = book.find('span', class_='author').text.strip()
genre = book.find('a', class_='btn btn4 yf-genre').text.strip()
##img_url = book.img.get('data-src')
print(rank + ' by ' + author)
##print('Image URL: ' + img_url)
data.append({'rank': rank, 'author': author, 'title': title, 'genres': genre})
# Create a dataframe and save it to a csv
df = pd.DataFrame (data)
df.to_csv('hebbanexport.csv', index=False)