1

my code is in python3 and I used it before to live stream tweets in English. However the same code when searching for an Arabic query, it returns all tweets in symbols and random characters.here is a screenshot, and the code. (ps: I am a beginner in coding) (thank you!) here is my code:enter image description here

import twitter,json,csv

CONSUMER_KEY = '<consumer key>'
CONSUMER_SECRET = '<consumer secret>'
OAUTH_TOKEN = '<oauth token>'
OAUTH_TOKEN_SECRET = '<oauth token secret>'

auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                       CONSUMER_KEY, CONSUMER_SECRET)

twitter_api = twitter.Twitter(auth=auth)

# setup a file to write to
csvfile = open('tweets_extended.csv', 'w')
csvwriter = csv.writer(csvfile, delimiter='|')

#  heres a function that takes out characters that can break
#  our import into Excel and replaces them with spaces
#  it also does the unicode bit
def getVal(val):
    clean = ""
    if val:
        val = val.replace('|', ' ')
        val = val.replace('\n', ' ')
        val = val.replace('\r', ' ')
        clean = val.encode('utf-8')
    return clean


 q = "سلمان" # Comma-separated list of terms can go here
 print ('Filtering the public timeline for track="%s"' % (q,))

 twitter_stream = twitter.TwitterStream(auth=twitter_api.auth)

 stream = twitter_stream.statuses.filter(track=q)

for tweet in stream:
    try:
        if tweet['truncated']:
            tweet_text = tweet['extended_tweet']['full_text']
        else:
            tweet_text = tweet['text']
        # write the values to file
        csvwriter.writerow([
            tweet['created_at'],
            getVal(tweet['user']['screen_name']),
            getVal(tweet_text),
            getVal(tweet['user']['location']),
            tweet['user']['statuses_count'],
            tweet['user']['followers_count'],
            tweet['user']['lang'],
            tweet['user']['id'],
            ])
        # print something to the screen, mostly so we can see what is going on...
        print (tweet['user']['screen_name'].encode('utf-8'), tweet['text'].encode('utf-8'))
    except Exception as err:
        print (err)
        pass
Joe Mayo
  • 7,501
  • 7
  • 41
  • 60

0 Answers0