The code below is very inefficient given large matrices. Is there a better way to implement this ?
I have already searched the web for this here.
import numpy as np
def cosine_similarity(x, y):
return np.dot(x, y) / (np.sqrt(np.dot(x, x)) * np.sqrt(np.dot(y, y)))
def compare(a, b):
c = np.zeros((a.shape[0], b.shape[0]))
for i, ai in enumerate(a):
for j, bj in enumerate(b):
c[i, j] = cosine_similarity(ai, bj)
return c
a = np.random.rand(100,2000)
b = np.random.rand(800,2000)
compare(a,b) # shape -> (100, 800)