It takes about 15 minutes for about 300 threads to initiate (prints out: 'initiated with proxy'); however, whenever I remove all the code within the while loop in the threads' run function, it initiates all threads within 10 seconds (maybe less). Any ideas as to what in the while loop is making all the threads initiate/start extremely slowly?
#!/usr/bin/env python
import requests
import sys
import string
import os.path
import urllib.request
import threading
import mimetypes
from time import gmtime, strftime, sleep
from random import choice
#list of our proxies
proxies = []
working = []
downloads = 1
initiated = 0
#the number of files we want to download
target = int(sys.argv[1])
#argument 2 - proxies
try:
sys.argv[2]
except:
pass
else:
param = sys.argv[2]
if param.find('.txt') != -1:
print('Loading specified proxy list ('+ param +').')
f = open(param, 'r+')
print('Opening '+ f.name)
proxylist = f.read()
f.close()
#split retrieved list by new line
proxies = proxylist.split('\n')
else:
print('Single proxy specified.')
proxies.append(param)
class thread(threading.Thread):
def __init__(self, ID, name, proxy):
threading.Thread.__init__(self)
self.id = ID
self.name = name
self.downloads = 0
self.proxy = proxy
self.running = True
self.fails = 0
def run(self):
global downloads
global working
global initiated
initiated += 1
if self.proxy != False:
#id is always above one, so make the ID -1
self.proxy = proxies[(self.id-1)]
print(self.name +' initiating with proxy: '+self.proxy)
else:
print(self.name +' initiating without a proxy.')
#start actual downloads
while downloads <= target and self.running:
#wait for all threads to be loaded before starting requests
if (initiated-1) == len(proxies):
rstr = ''.join(choice(string.ascii_letters + string.digits) for x in range(5))
url = 'http://puu.sh/'+rstr
filename = 'downloaded/'+ strftime('%Y %m %d %H-%M-%S ['+ rstr +']', gmtime()) +'.png'
try:
if self.proxy != False:
#make our requests go through proxy
r = requests.get(url, None, {'http' : self.proxy})
else:
r = requests.get(url)
except IOError:
if self.fails >= 10:
#print(self.name +': Proxy is not working. Stopping thread.')
self.running = False
self.fails += 1
pass
except:
pass
else:
if r.status_code == 200 and r.headers['Content-Type'] != 'text/html':
with open(filename, 'wb') as f:
for chunk in r.iter_content(1024):
f.write(chunk)
print(self.name +': '+ filename+' downloaded...' + str(downloads))
downloads += 1
self.downloads += 1
if not self.proxy in working and self.proxy != False:
working.append(self.proxy)
sleep(5)
#lets create the "downloaded" folder if it does not exist
if not os.path.isdir('downloaded'):
try:
os.mkdir('downloaded')
except:
pass
#thread count
thread_count = 1
#create threads, and initiate them
try:
thread(0, 'Thread-main', False).start()
for x in proxies:
thread(thread_count, 'Thread-'+str(thread_count), proxies[(thread_count-1)]).start()
thread_count += 1
except:
print('Couldn\'t start threads.')