Unicode Dedode Error in python3.5

Question

I have compiled the following code to create a word cloud of terms used in tweets containing the term "happy".

import tweepy
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
from time import sleep
import json
import os
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
from scipy.misc import imread
import time
import pandas as pd

consumer_key =''
consumer_secret =''
access_token =''
access_token_secret =''

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)

searchTerm ='happy'
MAX_TWEETS = 10

search_data=[ ]
current_working_dir = os.path.dirname(os.path.realpath(__file__))
current_working_dir = "./"
log_tweets = current_working_dir  + str(time.time()) + '_searchtweets.txt'
with open(log_tweets, 'w') as outfile:

    for tweet in tweepy.Cursor(api.search,q=searchTerm).items(MAX_TWEETS):

        search_data.append(json.loads(json.dumps(tweet._json)))
        outfile.write(json.dumps(tweet._json))
        outfile.write("\n")


tweets = pd.DataFrame()
tweets['created_at'] = list(map(lambda tweet: time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(tweet['created_at'],'%a %b %d %H:%M:%S +0000 %Y')), search_data))
tweets['user'] = list(map(lambda tweet: tweet['user']['screen_name'], search_data))
tweets['text'] = list(map(lambda tweet: tweet['text'].encode('utf-8'), search_data))
tweets['lang'] = list(map(lambda tweet: tweet['lang'], search_data))
tweets['Location'] = list(map(lambda tweet: tweet['place']['country'] if tweet['place'] != None else None, search_data))
tweets['retweet_count'] = list(map(lambda tweet: tweet['retweet_count'], search_data))
tweets['favorite_count'] = list(map(lambda tweet: tweet['favorite_count'], search_data))

tweets['long'] = list(map(lambda tweet: tweet['coordinates']['coordinates'][0] if tweet['coordinates'] != None else 'NaN', search_data))

tweets['latt'] = list(map(lambda tweet: tweet['coordinates']['coordinates'][1] if tweet['coordinates'] != None else 'NaN', search_data))


words = " ".join(tweets['text'].values.astype(str))

no_urls_no_tags = " ".join([word for word in words.split()
                            if 'http' not in word
                            and not word.startswith('@')
                            and word != 'RT'
                            ])

search_mask = imread('images/twitter_mask.png', flatten=True )

wc = WordCloud(background_color="white", 
               font_path="/Library/Fonts/Verdana.ttf", 
               stopwords=STOPWORDS, 
               width=1800, 
               height=140, 
               mask=search_mask)
wc.generate(no_urls_no_tags)
plt.imshow(wc)
plt.axis("off")
plt.savefig('search_term_wordcloud_print.png', dpi =300)
plt.show()

When running, i get the following error message,

words = " ".join(tweets['text'].values.astype(str))
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 139: ordinal not in range(128)

is there a way, or lines of code to change to stop this error from occurring?

You have to specify encoding. Please check this question http://stackoverflow.com/questions/9644099/python-ascii-codec-cant-decode-byte — taras, Feb 19 '17 at 17:32
Why `.encode('utf8')` the tweet text? Process the strings as Unicode and you won't have to convert it back to a `str` to join it. — Mark Tolonen, Feb 19 '17 at 19:55

Unicode Dedode Error in python3.5

0 Answers0