I am trying to find a way to implement the isalnum() function to remove special characters from the output of my script.
url = "https://www.amazon.com/gp/bestsellers/electronics/ref=zg-bs_electronics_dw_sml"
webpage = requests.get(url, headers=headers)
soup = BeautifulSoup(webpage.content, 'html.parser')
items = soup.find_all(id="gridItemRoot")
csv_headers = ['Index', 'Title', 'Rating', 'Review Count', 'Price']
with open('amazon_tech.csv', 'w', encoding='utf-8', newline='') as f:
writer = csv.writer(f)
writer.writerow(csv_headers)
for item in items:
removechar = ['â', '€', '“']
index = item.find('span', class_='zg-bdg-text').text[1:]
children = item.find('div', class_='zg-grid-general-faceout').div
title = children.contents[1].text
rating = children.contents[2].text[:4]
reviewcount = children.contents[2].text[19:]
price = children.contents[-1].text
with open('amazon_tech.csv', 'a', encoding='utf-8', newline='') as f:
writer = csv.writer(f)
writer.writerow([index, title, rating, reviewcount, price])
I want to remove the special characters in the "removechar" list from the title, rating, reviewcount, and price strings, before its written to the csv file.
How can I achieve this?
I tried adding to the bottom code like this, but I got stuck.
for item in items:
removechar = ['â', '€', '“']
#added this <------------
for i in remove:
#added this <-------------
if item.isalnum():
index = item.find('span', class_='zg-bdg-text').text[1:]
children = item.find('div', class_='zg-grid-general-faceout').div
title = children.contents[1].text
rating = children.contents[2].text[:4]
reviewcount = children.contents[2].text[19:]
price = children.contents[-1].text
with open('amazon_tech.csv', 'a', encoding='utf-8', newline='') as f:
writer = csv.writer(f)
writer.writerow([index, title, rating, reviewcount, price])
I am completely lost on how to finish it.