I am attempting to stream tweets in a PostgreSQL database with PostGIS extension using a Python code written in Jupyter Notebook with no success. I have used a lot of tutorials as reference and in the first attempts the code seemed to work and there were no mistakes. I even had printed the message that I was connected to Twitter API. However, no tweets were uploaded into the PostgreSQL database. I thought the problem could be the filters (since maybe I was using filters which had no tweets for the moment), but after some runs removing filters or using other filters I found that wasn't the problem. I think the connection to PostgreSQL isn't the problem neither since I tried printing the tweets directly into Jupyter Notebook and there was no mistake and no error.
After doing some changes basing myself into guides and checking the format of PostgreSQL tables, I see the code connects to the Twitter API but I get this message all the time: 'str' object is not callable
The PostgreSQL table is created using the following code, with the goal that the coordinates of the tweets are stored with point geometry:
CREATE TABLE tweets (tweet_id VARCHAR PRIMARY KEY, user_id VARCHAR, username TEXT, tweet TEXT, hashtags TEXT, lang TEXT, created_at TIMESTAMP, coordinates GEOMETRY);
The used Python code is the next one:
#!/usr/bin/env python
# coding: utf-8
#Import libraries
import tweepy
import pandas as pd
import json
import psycopg2
import time
from html.parser import HTMLParser
#Insert Twitter keys
ckey = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
csecret = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
atoken = "xxxxxxxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
asecret = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
#Authorize the Twitter API
auth = tweepy.OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
#Call the Twitter API
api = tweepy.API(auth)
#Define Listener block
class MyStreamListener(tweepy.StreamListener):
def __init__(self, time_limit=300):
self.start_time = time.time()
self.limit = time_limit
super(MyStreamListener, self).__init__()
def on_connect(self):
print("Connected to Twitter API.")
def on_status(self, status):
print(status.text)
def on_data(self, raw_data):
try:
datos = json.loads(raw_data)
#Filter only tweets with coordinates
if datos["coordinates"] != None:
#Obtain all the variables to store in each column
tweet_id = datos['id_str']
user_id = datos['user']['id']
user_name = datos['user']['name']
tweet = datos['text']
hashtags = datos["entities"]["hashtags"]
lang = datos['user']['lang']
created_at = datos['created_at']
coordinates = datos["coordinates"]["coordinates"]
# Connect to database
dbConnect(tweet_id, user_id, user_name, tweet, hashtags, lang, created_at, coordinates)
if (time.time() - self.start_time) > self.limit:
print(time.time(), self.start_time, self.limit)
return False
except Exception as e:
print(e)
def on_error(self, status_code):
if status_code == 420:
# Returning False in on_data disconnects the stream
return False
def dbConnect(tweet_id, user_id, user_name, tweet, hashtags, lang, created_at, coordinates):
#Connect to Twitter database created in PostgreSQL
conn = psycopg2.connect(host="localhost",database="datos_twitter",port=5433,user="xxxxxxx",password="xxxxxxx")
#Create a cursor to perform database operations
cur = conn.cursor()
#With the cursor, insert tweets into a PostgreSQL table
command = "INSERT INTO tweets (tweet_id, user_id, user_name, tweet, hashtags, lang, created_at, coordinates) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)"
cur.execute(command(tweet_id, user_id, user_name, tweet, hashtags, lang, created_at, coordinates))
#Commit changes
conn.commit()
#Close cursor and the connection
cur.close()
conn.close()
#Streaming of tweets
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth=api.auth, listener=myStreamListener, tweet_mode="extended")
#Filtering of tweets by spatial box and keywords
myStream.filter(locations=[-10.78,34.15, 5.95,44.04], track=['Madrid', 'madrid'])