import pandas as pd
import numpy as np
from fuzzywuzzy import fuzz,process
def match(x, y, min_score=0):
# -1 in case we don't get any match
max_score= -1
max_text = ''
for row2 in y:
#finding fuzzy match score
score = fuzz.ratio(x, row2)
#checking if we are above our threshold and have a better score
if (score > min_score) & (score > max_score):
max_score = score
max_text = row2
return (max_score, max_text)
#read the files
pd.options.display.max_columns = 10
#read only the 3rd column form both excel files
wb1 = pd.read_excel('Excel1.xlsx', 'Sheet_name', na_values=['NA'], usecols = [2])
wb2 = pd.read_excel('Excel2.xlsx', 'Sheet_name', na_values=['NA'], usecols = [2])
diff = pd.concat((wb1, wb2), axis = 1)
#add a new column to the DataFrame called "match"
diff['match'] = np.zeros((len(diff), ))
for i, row in enumerate(wb1['col_name']):
score, text = match(row, wb2['col2_name'])
print(score)
diff.iloc[i, 1] = text
diff.iloc[i, 2] = score
diff.to_excel("output.xlsx")