I have a very big dataframe with GPS values. I would need to compare one row with the other rows one at a time. Right now I am doing it within a loop and therefore is pretty inefficient. Is there a to perform this operation without a loop?
import numpy as np
def haversine_np(lon1, lat1, lon2, lat2):
lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
c = 2 * np.arcsin(np.sqrt(a))
km = 6367 * c
return km
import pandas as pd
d1 = {'id': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'longitude': [4.929783, 4.932333, 4.933950, 4.933900, 4.928467, 4.924583, 4.922133, 4.921400, 4.920967], 'latitude': [52.372250, 52.370884, 52.371101, 52.372234, 52.375282, 52.375950, 52.376301, 52.376232, 52.374481]}
df = pd.DataFrame(data=d1)
for i in range(df.shape[0]):
#take one row and perform calculation with all other rows
haversine_np(df.iloc[i]['latitude'],df.iloc[i]['longitude'],df['latitude'],df['longitude'])