I am trying to run an excel file with about 800 cells through my program. The program works for a file with about 15 cells perfectly. The error I am getting is
Traceback (most recent call last):
File "/Users/first_lastname/Documents/CSVScraping.py", line 16, in <module>
header = next(reader) #converts each row to a list
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xca in position 5214: invalid continuation byte
My code is the following:
import csv
from textblob import TextBlob
import string
z = 10
poscounter = 0
negcounter = 0
neucounter = 0
totalsentences = 0
outfile = open('/Users/first_lastname/Desktop/WebScraping/TripAdvisor/Juneau/analyzedData.csv', 'w')
with open('/Users/first_lastname/Desktop/WebScraping/TripAdvisor/Juneau/tripadvisor_2021.csv', 'r') as infile:
reader = csv.reader(infile)
header = next(reader) #converts each row to a list
#[rows][columns]
for row in reader:
trip_review = row[2]
#print(trip_review)
y = trip_review.lower()
y = y.translate(str.maketrans('', '', string.punctuation))
y1 = TextBlob(y)
trip_sentiment = y1.sentiment.polarity
xplitIt = y.split(" ")
for something in xplitIt:
if something == "crowded" or something == "busy" or something == "crowds" or something == "hate" or something == "hated":
tripSentiment = -0.1
break
if trip_sentiment ==0:
neucounter+=1
elif trip_sentiment >0 and trip_sentiment <=1:
poscounter+=1
elif trip_sentiment == -0.1 or trip_sentiment < 0:
negcounter+=1
totalsentences = totalsentences + 1
line = "{};{}\n".format(trip_review, trip_sentiment)
outfile.write(line)
outfile.close()
print(f"Positive Sentiment: {(poscounter/totalsentences) * 100} %")
print(f"Negative Sentiment: {(negcounter/totalsentences) * 100} %")
print(f"Neutral Sentiment: {(neucounter/totalsentences) * 100} %")
#print(totalsentences)
Any suggestions on how to fix this would be appreciated!!!!