I want to calculate the geo-distance between latitude-longitude.
I had checked this thread Vectorizing Haversine distance calculation in Python but when I am using it for two different set of coordinates, I m getting an error.
df1 size can be in millions and if there is any other way to calculate accurate geo distance in less time then it would be really helpful.
length1 = 1000
d1 = np.random.uniform(-90, 90, length1)
d2 = np.random.uniform(-180, 180, length1)
length2 = 100
d3 = np.random.uniform(-90, 90, length2)
d4 = np.random.uniform(-180, 180, length2)
coords = tuple(zip(d1, d2))
df1 = pd.DataFrame({'coordinates':coords})
coords = tuple(zip(d3, d4))
df2 = pd.DataFrame({'coordinates':coords})
def get_diff(df1, df2):
data1 = np.array(df1['coordinates'].tolist())
data2 = np.array(df2['coordinates'].tolist())
lat1 = data1[:,0]
lng1 = data1[:,1]
lat2 = data2[:,0]
lng2 = data2[:,1]
#print(lat1.shape)
#print(lng1.shape)
#print(lat2.shape)
#print(lng2.shape)
diff_lat = lat1[:,None] - lat2
diff_lng = lng1[:,None] - lng2
#print(diff_lat.shape)
#print(diff_lng.shape)
d = np.sin(diff_lat/2)**2 + np.cos(lat1[:,None])*np.cos(lat1) * np.sin(diff_lng/2)**2
return 2 * 6371 * np.arcsin(np.sqrt(d))
get_diff(df1, df2)
ValueError Traceback (most recent call last)
<ipython-input-58-df06c7cff72c> in <module>
----> 1 get_diff(df1, df2)
<ipython-input-57-9bd8f10189e6> in get_diff(df1, df2)
26 print(diff_lat.shape)
27 print(diff_lng.shape)
---> 28 d = np.sin(diff_lat/2)**2 + np.cos(lat1[:,None])*np.cos(lat1) * np.sin(diff_lng/2)**2
29 return 2 * 6371 * np.arcsin(np.sqrt(d))
ValueError: operands could not be broadcast together with shapes (1000,1000) (1000,100)