I have a script, taking links from a file, visiting it, getting re-directed links, storing it back. But it works too slow on a file with 15k records. How can I make it quick? already used threading Please do help to fix it out!, I've tried multiple ways, threadings but I cannot make it quick. Is there any solution to my problem by any chance? any expert who could help me out.
import concurrent.futures
import sys
import pandas as pd
import requests
from threading import Thread
from queue import Queue
out_put_file=""
linkes = None
out = []
urls = []
old = []
file_name =None
concurrent = 10000
q = None
count=0
df =None
def do_work():
while True:
global q
url = q.get()
res = get_status(url)
q.task_done()
def get_status(o_url):
try:
res = requests.get(o_url)
if res:
out.append(res.url)
old.append(o_url)
print(count)
count=count+1
return [res.status_code,res.url ,o_url]
except:
pass
return [ans.status_code,ans.url,url]
def process_data():
global q
global file_name
global linkes
global df
file_name = input("Enter file name : ")
file_name = file_name.strip()
print("Generating .......")
df = pd.read_csv(file_name+".csv")
old_links =df["shopify"]
for i in old_links:
if type(i)!=str:
urls.append(i)
continue
if not i.startswith("http"):
linkes = "http://"+i
urls.append(linkes)
else:
urls.append(i)
df["shopify"]=urls
q = Queue(concurrent * 2)
for i in range(concurrent):
t = Thread(target=do_work)
t.daemon = True
t.start()
try:
for url in urls:
if type(url)!=str:
continue
q.put(url.strip())
q.join()
except KeyboardInterrupt:
sys.exit(1)
process_data()
for i in range (len(df['shopify'])):
for j in range(len(old)):
if df['shopify'][i]==old[j]:
df['shopify'][i]=out[j]
df = df[~df['shopify'].astype(str).str.startswith('http:')]
df = df.dropna()
df.to_csv(file_name+"-new.csv",index=False)
Email,shopify,Proofy_Status_Name
hello@knobblystudio.com,http://puravidabracelets.myshopify.com,Deliverable
service@cafe-select.co.uk,cafe-select.co.uk,Deliverable
mtafich@gmail.com,,Deliverable
whoopies@stevessnacks.com,stevessnacks.com,Deliverable
customerservice@runwayriches.com,runwayriches.com,Deliverable
shop@blackdogride.com.au,blackdogride.com.au,Deliverable
anavasconcelos.nica@gmail.com,grass4you.com,Deliverable
info@prideandprestigehair.com,prideandprestigehair.com,Deliverable
info@dancinwoofs.com,dancinwoofs.com,Deliverable