I am calculating edit distance between two dataframe. Both the dataframe consists of ~30L of rows, as the dataframe size is large it is taking lot of time. Is there any way to improve the performance?
for i in range(0,len(targets1)):
if i % 100 == 0:
pct = (i/len(targets1)) * 100
print("(" + str(dt.datetime.now()) + ") completed: " + str(round(pct, 2)) + "%")
sr1_new=sr1[(sr2==targets2[i]) & (len_sr2>=(len_targets2[i]-10)) & (len_sr2 <=(len_targets2[i]+10))]
if len(sr1_new) > 0:
ee=sr1_new.str.edit_distance(targets1[i])
ee=ee.sort_values()
output_final = output_final.append({'Name': targets[i],'Matched_Name': sr[ee.index[0]],'score':ee[ee.index[0]],'score_final':(len(sr[ee.index[0]])+len(targets[i])-ee[ee.index[0]])/(len(sr[ee.index[0]])+len(targets[i]))*100}, ignore_index=True)
else:
output_final = output_final.append({'Name': targets[i],'Matched_Name': '','Matched_REF': "0",'score':0,'score_final':0}, ignore_index=True)
targets1 = pd.Series(['ABBSHHCH','ABBSAJSJAHDKAJKJ', 'BASJBASJASH', 'KJSAKASJAS', 'KJSAIUBDAKS',
'KAJSNDSAX', 'JASANXAJSKJ', 'NASNXHY', 'AIUSSHXBAHSJASHJ'])
targets2 = pd.Series(['AB','AB', 'BA', 'KJ', 'KJ','KA', 'JA', 'NA', 'AI'])
sr1 = pd.Series(['ABBSHHSJAKX','ABBMNASASJKKLASAHDKAJKJ', 'BASSAMSAJASH', 'KJSMSANMAASJAS', 'KJSSMNASBDAKS',
'KASKJADSAX', 'JASAKJKJSKJ', 'NASAKXHY', 'AIUSSANMASSJASHJ','NSAASJNCXA','ABBSASMNKAJKJ', 'ASNASNXJASH',
'KJSKJSAKSJAS', 'KJASKJSDAKS', 'KAJSAKJSAX', 'JAKJASXAJSKJ', 'NADADHY', 'AIUSNASSASJASHJ'])
sr2 = pd.Series(['AB','AB','BA','KJ','KJ','KA','JA','NA','AI','NS','AB','AS','KJ','KJ','KA','JA','NA','AI'])
len_sr2 = pd.Series([11,23,12,14,13,10,11,8,16,10,13,11,12,11,10,12,7,15])
len_targets2 = pd.Series([8,16,11,10,11,9,11,7,16])