I have a number of twitter json files that I want to search through for keywords and save those tweets with the keywords as csv. In the process of trying to remove the blank lines, I've tried suggestions from similar posts such as this: How to delete all blank lines in the file with the help of python?). However, the blank lines continue to appear in my csv file. Would anyone know what the problem is?
filenames = ["8may.json", "9may.json"]
open_files = map(open, filenames)
# keywords that you want to filter out; note that keywords should be in all lowercase
## change this to the keywords you want to use
keywords = ["test", "testing", "tester"]
# iterates though the files and does keyword matching; the tweet is only saved in csv if the tweet["text"] matches the keywords
for file in open_files:
for line in file:
if line.rstrip():
try:
# condition for searching through tweet["text"] with keywords
if re.findall(r'\b(%s)\b' % '|'.join(keywords), str(line).lower()):
tweets.append(json.loads(line))
except:
pass
for tweet in tweets:
ids.append(tweet["id_str"])
texts.append(tweet["text"])
time_created.append(tweet["created_at"])
retweet_counts.append(tweet["retweet_count"])
in_reply_to_screen_name.append(tweet["in_reply_to_screen_name"])
geos.append(tweet["geo"])
coordinates.append(tweet["coordinates"])
places.append(tweet["place"])
# if there is no places data, then return None
try:
places_country.append(tweet["place"]["country"])
except:
places_country.append("None")
lang.append(tweet["lang"])
user_screen_names.append(tweet["user"]["screen_name"])
user_followers_count.append(tweet["user"]["followers_count"])
user_friends_count.append(tweet["user"]["friends_count"])
user_statuses_count.append(tweet["user"]["statuses_count"])
user_locations.append(tweet["user"]["statuses_count"])
print >> out, "ids,text,time_created,retweet_counts,in_reply_to,geos,coordinates,places,country,language,screen_name,followers,friends,statuses,locations"
rows = zip(ids,texts,time_created,retweet_counts,in_reply_to_screen_name,geos,coordinates,places,places_country,lang,user_screen_names,user_followers_count,user_friends_count,user_statuses_count,user_locations)
csv = writer(out)
for row in rows:
values = [(value.encode('utf8') if hasattr(value, 'encode') else value) for value in row]
csv.writerow(values)
out.close()