This is a logical question, no programming syntax help needed, just the big picture stuff:
What I currently have
I wrote a Python script that downloads the "Who's Online" list of players of a game. The list is posted on a website. Python downloads the list, removes all the extra HTML and leaves just a pretty list of users, one per line.
It then saves this list to a file. Every 1 minute it downloads the newest "Who's Online" list from the game website, and it then checks the new list against the old list.
If there is a new user that it didn't see before, it adds it to the list, but doesn't remove anyone. So the list continues to grow for 1hour, then python renames the list to "Users_hour_log_timestamp" and starts fresh.
It also creates a list of users for a Day_log, week_log, month_log etc.
Python code
# IMPORT WEB BROWSER
import mechanize
# IMPORT HTML PARSER
from bs4 import BeautifulSoup, NavigableString, Tag
# FOR TIME CALCS
from datetime import datetime, timedelta, date
# FOR RENAMING FILES
import os
# FOR SLEEP
import time
# FOR 1 MONTH FROM NOW CALCULATOR
from dateutil.relativedelta import relativedelta
# CALC ONE MONTH FROM TODAY
ONE_MONTH_LATER = date.today() + relativedelta(months=+1)
# START WEEK TIMER
START_TIME_WEEK = datetime.now()
# SET VARIABLE TO DETERMINE FIRST TIME RUNNING A DAY DUMP
FIRST_TIME = 'yes'
# TIME DELTAS
HOURS_DAY = timedelta(hours=24)
HOURS_WEEK = timedelta(hours=168)
# NEVER ENDING LOOP THAT SLEEPS 1 MINUTE BETWEEN CYCLES
while True:
# EVERY MINUTE, UPDATE WHAT TODAY IS
today = datetime.today()
# DATE TIME FOR TIMESTAMPING LOGS
dt = today.strftime('%Y-%m-%d_%H.%M.%S_')
# THIS CHECKS TODAYS DATE TO BE COMPARED TO THE ONE_MONTH_LATER VAR
MONTH_TODAY = today.strftime('%Y-%m-%d')
# GET CURRENT TIME
CURRENT_TIME = datetime.now()
# USE CURRENT TIME TO CALCULATE IF 1 WEEK HAS PASSED
TIME_PASSED_WEEK = CURRENT_TIME - START_TIME_WEEK
# GET THE CURRENT HOUR-MINUTE, THIS IS USED TO DUMP DAY LOGS AT 11:59 PM (23-59)
TIME_HOUR = today.strftime('%H-%M')
# GET CURRENT MINUTE, THIS IS USED TO DUMP HOUR LOGS AT 59 (END OF EACH HOUR)
TIME_MINUTE = today.strftime('%M')
print "START of LOOP FIRST TIME = %s" % FIRST_TIME
# IF IT IS MIN 59 OF ANY HOUR, DUMP THE HOUR LOG
if TIME_MINUTE == '59':
newname = dt + 'WAR2RU_USERS_HOUR_LOG.txt'
os.rename('users-all-hour.txt', newname)
# CLEAR THE LOG TO START NEW HOUR LOG
clear = open("users-all-hour.txt", "w")
clear.close()
print "HOUR LOG DUMPED"
# IF ITS THE FIRST TIME RUNNING, DAY LOG HASNT DUMPED, RESET WEEK TIMER AT MIDNIGHT AND DUMP DAY LOG
if TIME_HOUR == '23-59' and FIRST_TIME == 'yes':
print "First time day dump:"
# RENAME CURRENT DAY LOG TO TIME STAMPED DAY LOG
newname = dt + 'WAR2RU_USERS_DAY_LOG.txt'
print "newname = %s" % newname
os.rename('users-all-day.txt', newname)
# CLEAR THE LOG TO START NEW DAY
clear = open("users-all-day.txt", "w")
clear.close()
print "DAY LOG DUMPED"
# RESET VAR
FIRST_TIME = 'no'
# RESET THE WEEK LOG
clear = open("users-all-week.txt", "w")
clear.close
# RESET THE DAY LOG
clear = open("users-all-day.txt", "w")
clear.close()
# START THE WEEK TIMER SO IT BEGINS A NEW WEEK
START_TIME_WEEK = datetime.now()
# IF ITS 12am, NOT FIRST TIME RUNNING, AND DAY LOG HASNT BEEN DUMPED YET
if TIME_HOUR == '23-59'and FIRST_TIME == 'no':
print "new day, first time is NO"
newname = dt + 'WAR2RU_USERS_DAY_LOG.txt'
os.rename('users-all-day.txt', newname)
# CLEAR THE LOG TO START NEW DAY
clear = open("users-all-day.txt", "w")
clear.close()
print "DAY LOG DUMPED"
# RESET THE FIRST_TIME VAR
FIRST_TIME = 'no'
print "END of LOOP FIRST TIME = %s" % FIRST_TIME
if TIME_PASSED_WEEK > HOURS_WEEK:
newname = dt + 'WAR2RU_USERS_WEEK_LOG.txt'
os.rename('users-all-week.txt', newname)
# CLEAR THE LOG TO START NEW DAY
clear = open("users-all-week.txt", "w")
clear.close()
print "WEEK LOG DUMPED"
START_TIME_WEEK = datetime.now()
if MONTH_TODAY == ONE_MONTH_LATER:
newname = dt + '_WAR2RU_USERS_MONTH_LOG.txt'
os.rename('users-all-month.txt', newname)
# CLEAR THE LOG TO START NEW DAY
clear = open("users-all-month.txt", "w")
clear.close()
print "MONTH LOG DUMPED"
ONE_MONTH_LATER = date.today() + relativedelta(months=+1)
br = mechanize.Browser()
# Browser options
br.set_handle_equiv(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
# Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
# Want debugging messages?
# br.set_debug_http(True)
# br.set_debug_redirects(True)
# br.set_debug_responses(True)
try:
# User-Agent
br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko')]
r = br.open('http://en.war2.ru/wp-content/themes/war2.ru/lib/server-stats.php')
page = r.read()
soup = BeautifulSoup(page)
user_list = soup.find_all('div', class_='status-list')
except:
print "ERROR: Connection failed. Retrying ..."
continue
############### DAY LOG #######################
# START WITH ALL USERS LIST, ADD CURRENT USERS, THEN DE DUP OVER TO ALL USERS LIST
with open("users-all-day.txt", "r") as f:
lines = f.readlines()
with open("users-this-minute-day.txt", "w") as f1:
f1.writelines(lines)
f1.close()
f.close()
with open("users-all-week.txt", "r") as f:
lines = f.readlines()
with open("users-this-minute-week.txt", "w") as f1:
f1.writelines(lines)
f1.close()
f.close()
with open("users-all-month.txt", "r") as f:
lines = f.readlines()
with open("users-this-minute-month.txt", "w") as f1:
f1.writelines(lines)
f1.close()
f.close()
with open("users-all.txt", "r") as f:
lines = f.readlines()
with open("users-this-minute-all.txt", "w") as f1:
f1.writelines(lines)
f1.close()
f.close()
with open("users-all-hour.txt", "r") as f:
lines = f.readlines()
with open("users-this-minute-hour.txt", "w") as f1:
f1.writelines(lines)
f1.close()
f.close()
# GET USERS FROM RU SITE
for br in user_list:
users = br.findAll('br')
for br in users:
next = br.nextSibling
if not (next and isinstance(next,NavigableString)):
continue
next2 = next.nextSibling
if next2 and isinstance(next2,Tag) and next2.name == 'br':
text = str(next.encode('latin1')).strip()
if text:
output = next[1:].encode('latin1')
# ADD A USER TO users-this-minute-day.txt
file = open("users-this-minute-day.txt", "at")
print >>file, str(output)
file.close
# ADD A USER TO users-this-minute-day.txt
file = open("users-this-minute-week.txt", "at")
print >>file, str(output)
file.close
# ADD A USER TO users-this-minute-day.txt
file = open("users-this-minute-month.txt", "at")
print >>file, str(output)
file.close
# ADD A USER TO users-this-minute-day.txt
file = open("users-this-minute-all.txt", "at")
print >>file, str(output)
file.close
# ADD A USER TO users-this-minute-hour.txt
file = open("users-this-minute-hour.txt", "at")
print >>file, str(output)
file.close
lines_seen = set() # holds lines already seen
outfile = open("users-all-day.txt", "w")
for line in open("users-this-minute-day.txt", "r"):
if line not in lines_seen: # not a duplicate
lines_seen.add(line)
outfile.writelines(sorted(lines_seen))
outfile.close()
lines_seen = set() # holds lines already seen
outfile = open("users-all-week.txt", "w")
for line in open("users-this-minute-week.txt", "r"):
if line not in lines_seen: # not a duplicate
lines_seen.add(line)
outfile.writelines(sorted(lines_seen))
outfile.close()
lines_seen = set() # holds lines already seen
outfile = open("users-all-month.txt", "w")
for line in open("users-this-minute-month.txt", "r"):
if line not in lines_seen: # not a duplicate
lines_seen.add(line)
outfile.writelines(sorted(lines_seen))
outfile.close()
lines_seen = set() # holds lines already seen
outfile = open("users-all.txt", "w")
for line in open("users-this-minute-all.txt", "r"):
if line not in lines_seen: # not a duplicate
lines_seen.add(line)
outfile.writelines(sorted(lines_seen))
outfile.close()
lines_seen = set() # holds lines already seen
outfile = open("users-all-hour.txt", "w")
for line in open("users-this-minute-hour.txt", "r"):
if line not in lines_seen: # not a duplicate
lines_seen.add(line)
outfile.writelines(sorted(lines_seen))
outfile.close()
print "Sleeping for 1 minute ..."
time.sleep(60)
Now this is all nice for manually searching through and creating a big list of game usernames, but now I want it to be more sophisticated.
Desired Functionality
These are the kind of questions I need a database to be able to answer:
BE ABLE TO ANSWER A QUESTION LIKE "Was Player X online from 5pm-8pm on Nov 22 2015?"
BE ABLE TO DRAW A CHART OF THE TIME OF DAY/DAY OF WEEK WITH MOST PLAYERS ONLINE
BE ABLE TO HAVE A CALENDAR FOR EACH PLAYER SHOWING WHAT DAYS/HOURS THAT MONTH THAT THEY PLAYED
BE ABLE TO HAVE A LEADERBOARD OF PLAYERS ONLINE THE MOST
In order to do this I know I need to start with a database and stop using text files.
That is where the problem is for me. I cannot get my head around what a database for this would look like.
It would obviously have a column for each user, but then how would I keep track of what minutes, hours, and days that user was online?
I have completely rewritten this because I am trying to convey this idea so that others can understand. Please let me know what you think
Thanks!