I have the following code:
import os
from ghost import Ghost
import urlparse, urllib
import SimpleHTTPServer
import SocketServer
import sys, traceback
from threading import Thread, Event
from time import sleep
please_die = Event() # this is my enemy
httpd = None
PORT = 8001
address = 'http://localhost:'+str(PORT)+'/'
search_dir = './category'
def main():
"""
basic run script routine,
FIXME: is supossed to exits gracefully
"""
thread = Thread(target = simpleServe)
try:
thread.start()
run()
except KeyboardInterrupt:
print "Shutdown requested"
except Exception:
traceback.print_exc(file=sys.stdout)
shutdown()
sys.exit(0)
def shutdown():
global httpd
global please_die
print "Shutting down"
# A try - except for the shutdown routine
try:
please_die.wait() # how do you do?
httpd.shutdown() # Please! I whant to run you multiple times.
print "Have you died?"
except Exception:
traceback.print_exc(file=sys.stdout)
def path2url(path):
"""
constructs an url from a relative path / concatenates the global address
variable with the path given
"""
global address
return urlparse.urljoin(address, urllib.pathname2url(path))
def simpleServe():
global httpd, PORT
please_die.set() # Attaching the event to this thread
# Start the service
Handler = SimpleHTTPServer.SimpleHTTPRequestHandler
httpd = SocketServer.TCPServer(("", PORT), Handler)
print "serving at port", PORT
# And loop infinetly in the hope that I can stop you later
httpd.serve_forever()
def run():
global search_dir;
ghost = Ghost() # the webkit facade
with ghost.start() as session:
session.set_viewport_size(2560, 1600) # "retina" size
for directory, subdirectories, files in os.walk(search_dir):
for file in files:
path = os.path.join(directory, file)
urlPath = path2url(path)
process(session, urlPath);
def process(session, urlPath):
page, resources = session.open(urlPath)
assert page.http_status == 200
# ... other asserts here
if __name__ == '__main__':
main()
The idea is to make a script that starts a "simple http server", do some requests on it and then exit.
First time it runs without any problems:
...
127.0.0.1 - - [31/Jul/2015 13:16:17] "GET /category/52003.html HTTP/1.1" 200 -
127.0.0.1 - - [31/Jul/2015 13:16:17] "GET /category/52003.html HTTP/1.1" 200 -
127.0.0.1 - - [31/Jul/2015 13:16:17] "GET /category/52003.html HTTP/1.1" 200 -
127.0.0.1 - - [31/Jul/2015 13:16:17] "GET /static/img/glyphicons-halflings.png HTTP/1.1" 200 -
Shutting down
Have you died?
Launching it the second time crashes saying that the:
Address already in use
Exception in thread Thread-1:
Traceback (most recent call last):
File "/usr/lib/python2.7/threading.py", line 810, in __bootstrap_inner
self.run()
File "/usr/lib/python2.7/threading.py", line 763, in run
self.__target(*self.__args, **self.__kwargs)
File "download-images.py", line 51, in simpleServe
httpd = SocketServer.TCPServer(("", PORT), Handler)
File "/usr/lib/python2.7/SocketServer.py", line 420, in __init__
self.server_bind()
File "/usr/lib/python2.7/SocketServer.py", line 434, in server_bind
self.socket.bind(self.server_address)
File "/usr/lib/python2.7/socket.py", line 228, in meth
return getattr(self._sock,name)(*args)
error: [Errno 98] Address already in use
If I kill all python processes than the script runs again, and because of that I'm assuming that I used the thread wrong, but I cannot find where.
Update
Forgot to mention that,
my OS is :
$ lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 15.04
Release: 15.04
Codename: vivid
The python that I'm using is :
$ python --version
Python 2.7.9
$ netstat -putelan | grep 8001 prints :
$ netstat -putelan | grep 8001
(Not all processes could be identified, non-owned process info
cp 0 0 127.0.0.1:34691 127.0.0.1:8001 TIME_WAIT 0 0 -
tcp 0 0 127.0.0.1:8001 127.0.0.1:34866 TIME_WAIT 0 0 -
tcp 0 0 127.0.0.1:34798 127.0.0.1:8001 TIME_WAIT 0 0 -
tcp 0 0 127.0.0.1:8001 127.0.0.1:34588 TIME_WAIT 0 0 -
tcp 0 0 127.0.0.1:34647 127.0.0.1:8001 TIME_WAIT 0 0 -
tcp 0 0 127.0.0.1:34915 127.0.0.1:8001 TIME_WAIT 0 0 -
tcp 0 0 127.0.0.1:34674 127.0.0.1:8001 TIME_WAIT 0 0 -
tcp 0 0 127.0.0.1:34451 127.0.0.1:8001 TIME_WAIT 0 0 -
tcp 0 0 127.0.0.1:8001 127.0.0.1:34930 TIME_WAIT 0 0 -
tcp 0 0 127.0.0.1:8001 127.0.0.1:34606 TIME_WAIT 0 0 -
tcp 0 0 127.0.0.1:34505 127.0.0.1:8001 TIME_WAIT 0 0 -
tcp 0 0 127.0.0.1:34717 127.0.0.1:8001 TIME_WAIT 0 0 -
tcp 0 0 127.0.0.1:8001 127.0.0.1:34670 0 0 127.0.0.1:8001 127.0.0.1:34626
...
I can't post the whole sequence (due to the post limits of stackoverflow). The rest is the same with 34*** port mixed with 8001 port in an uniform sequence.