Honestly, I was not even sure what to title this question. I am trying to loop through a large list of URLs, but only processing 20 URLs (20 is based on how many proxies I have) at a time. But I also need to keep looping through the proxy list, as I am processing the URLs. So, for example, it would start with the 1st URL and 1st proxy, and once it hits the 21st URL, it would use the 1st proxy again. Here is my poor example below, if anyone can even point me in the right direction, it would be much appreciated.
import pymysql.cursors
from multiprocessing import Pool
from fake_useragent import UserAgent
def worker(args):
var_a, id, name, content, proxy, headers, connection = args
print (var_a)
print (id)
print (name)
print (content)
print (proxy)
print (headers)
print (connection)
print ('---------------------------')
if __name__ == '__main__':
connection = pymysql.connect(
host = 'host ',
user = 'user',
password = 'password',
db = 'db',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor
)
ua = UserAgent()
user_agent = ua.chrome
headers = {'User-Agent' : user_agent}
proxies = [
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx',
'xxx.xxx.xxx.xxx:xxxxx'
]
with connection.cursor() as cursor:
sql = "SELECT id,name,content FROM table"
cursor.execute(sql)
urls = cursor.fetchall()
var_a = 'static'
data = ((var_a, url['id'], url['name'], url['content'], proxies[i % len(proxies)], headers, connection) for i, url in enumerate(urls))
proc_num = 20
p = Pool(processes=proc_num)
results = p.imap(worker, data)
p.close()
p.join()