1

Though urls is properly defined, I do keep getting "global name 'urls' is not defined" and the url data is not inserted into MYSQL. Any suggestions on where? I'm making mistake here?

# ! /usr/bin/python

# Description : This script can collect the URLs from Tweets and Records them into research MYSQL DB.

from __future__ import print_function
import tweepy
import json
import MySQLdb
from dateutil import parser

WORDS = ['security']

# CREDENTAILS
CONSUMER_KEY = ""
CONSUMER_SECRET = ""
ACCESS_TOKEN = ""
ACCESS_TOKEN_SECRET = ""

HOST = "192.168.150.94"
USER = "root"
PASSWD = "blah"
DATABASE = "tweets"


def store_data(tweet_url):
    db = MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE,
                         charset="utf8")
    cursor = db.cursor()
    insert_query = "INSERT INTO  tweet_url (urls) VALUES (%s)"
    cursor.execute(insert_query, (urls))
    db.commit()
    cursor.close()
    db.close()
    return


class StreamListener(tweepy.StreamListener):
    def on_connect(self):
        print("We are now connected to the streaming API.")

    def on_error(self, status_code):
        print('An Error has occured: ' + repr(status_code))
        return False

    def on_data(self, data):
        try:
            datajson = json.loads(data)
            web_url = datajson['entities']['urls']
            print(web_url)
            for i in web_url:
                web_urls = i['expanded_url']
                urls = web_urls
            print(urls)
        store_data(urls)

    except Exception as e:
    print(e)


auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
listener = StreamListener(api=tweepy.API(wait_on_rate_limit=True))
streamer = tweepy.Stream(auth=auth, listener=listener)
print("Tracking: " + str(WORDS))
streamer.filter(track=WORDS)
Arun
  • 1,160
  • 3
  • 17
  • 33

2 Answers2

1

You just need to rename the parameter urls in the function store_data to tweet_url

def store_data(tweet_url):
    db = MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE,
                         charset="utf8")
    cursor = db.cursor()
    insert_query = "INSERT INTO  tweet_url (urls) VALUES (%s)"
    cursor.execute(insert_query, (tweet_url))

The way you want to store data stays unclear. If you call store_data after the loop, it's only storing the last value, you should better store each value in a list:

def on_data(self, data):
    try:
        datajson = json.loads(data)
        web_url = datajson['entities']['urls']
        print(web_url)
        urls = []
        for i in web_url:
            urls.append((i['expanded_url'],)) 
            # stores a tuple to make it easy in the database insertion
        print(urls)
        store_data(urls)
    except:
         [...]

This way need another little fix inside store_data:

def store_data(urls):
    db = MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE,
                         charset="utf8")
    cursor = db.cursor()
    insert_query = "INSERT INTO  tweet_url (urls) VALUES (%s)"
    cursor.executemany(insert_query, urls)
    db.commit()
    cursor.close()
    db.close()
    return
PRMoureu
  • 12,817
  • 6
  • 38
  • 48
  • Thank you Moureu , Changed it ! Works fine – Arun Aug 20 '17 at 18:23
  • PRmonreu : But there is a stop here , When there is more than two URLs in the tweet "web_url" , I'm getting "not all arguments converted during string formatting" error. But when there is a single URL present on tweet, That is getting inserted to MYSQL. ? – Arun Aug 20 '17 at 20:13
  • @Arun check this [post](https://stackoverflow.com/questions/14011160/how-to-use-python-mysqldb-to-insert-many-rows-at-once) to understand what could be wrong, in `store_data()`, `urls` needs to be a list of tuples (with one `expanded_url` inside) . `executemany` will iterate over this list. – PRMoureu Aug 20 '17 at 20:26
1

Inside your function store_data() you are using urls which is not defined because what you pass to your function is tweet_url instead.

You need to either change your function argument to urls instead of tweet_url like this:

def store_data(urls):
    # ...

Or change urls to tweet_url in your function body:

# ...
cursor.execute(insert_query, (tweet_url))
# ...

And make sure you fix the indentation inside on_data() method as below:

class StreamListener(tweepy.StreamListener):
    # ...

    def on_data(self, data):
        try:
            datajson = json.loads(data)
            web_url = datajson['entities']['urls']
            print(web_url)
            for i in web_url:
                web_urls = i['expanded_url']
                urls = web_urls
            print(urls)
            store_data(urls)
        except Exception as e:
            print(e)
ettanany
  • 19,038
  • 9
  • 47
  • 63