I am trying to look at President Trump's tweets on immigration and do some sentiment analysis on it. My code is:
import pprint
import datetime
# startDate = datetime.datetime(2019, 4, 20, 0, 0, 0)
# endDate = datetime.datetime(2020, 4, 29, 0, 0, 0)
username = "realDonaldTrump"
page = 1
stop_loop = False
finalList1 = []
curs = tweepy.Cursor(api.user_timeline, id=username)
for item in curs.items():
finalList1.append(item)
print(len(finalList1))
data = pd.DataFrame(data=[tweet.text for tweet in finalList1], columns=['Tweets'])
#Add Relevant data
data['len'] = np.array([len(tweet.text) for tweet in finalList1])
data['ID'] = np.array([tweet.id for tweet in finalList1])
data['Date'] = np.array([tweet.created_at for tweet in finalList1])
data['Source'] = np.array([tweet.source for tweet in finalList1])
data['Likes'] = np.array([tweet.favorite_count for tweet in finalList1])
data['RTs'] = np.array([tweet.retweet_count for tweet in finalList1])
#Sentiment Analysis
from textblob import TextBlob
import re
def clean_tweet(tweet):
'''
Utility function to clean the text in a tweet by removing
links and special characters using regex.
'''
return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
def analize_sentiment(tweet):
'''
Utility function to classify the polarity of a tweet
using textblob.
'''
analysis = TextBlob(clean_tweet(tweet))
if analysis.sentiment.polarity > 0:
return 1
elif analysis.sentiment.polarity == 0:
return 0
else:
return -1
data['SA'] = np.array([ analize_sentiment(tweet) for tweet in data['Tweets'] ])
The code works perfectly fine. However, I have 2 questions:
- How do I get access to tweets before these? It gives me 3200 tweets; how do I get the ones before that
- How do I get the Donald Trump's tweets which have specific keywords like 'immigration', 'refugee', 'china' etc.
I have been trying to figure out a way but unsuccessful.