I have two dataframes
df1
Address lat lon
store_12 30.375745 -87.679788
store_132 33.382099 -111.964918
store_134 32.374632 -111.100671
store_31 34.215678 -119.065539
store_23 33.126252 -117.321188
df2
Address lat lon
geo123 59.5119 -139.6711
geo134 66.9161 -151.5089
geo154 65.3700 -146.5900
geo112 64.7408 -156.8756
geo342 62.9575 -155.6103
geo543 66.9500 -150.6700
I have written my code using haversine formula but it calculates distance between all possible pairs, whereas I need the pair with the minimum distance
import pandas as pd
from math import cos, asin, sqrt
d1 = {'Address':['store_12', 'store_132', 'store_134', 'store_31' ,'store_23'], 'lat':[30.3757446, 33.3820989, 32.3746316, 34.2156779,33.1262516], 'lon':[-87.6797877,-111.964918, -111.1006705, -119.0655388, -117.3211879]}
d2 = {'loc':['geo123', 'geo134', 'geo154', 'geo112' ,'geo342','geo543'], 'lat':[59.5119, 66.9161, 65.37, 64.7408,62.9575,66.95], 'lon':[-139.6711,-151.5089, -146.59,-156.8756, -155.6103, -150.67]}
df1 = pd.DataFrame(d1)
df2 = pd.DataFrame(d2)
new_df = pd.DataFrame(columns = ["Store","Location","Distance"])
def distance(lat1, lon1, lat2, lon2):
p = 0.017453292519943295
hav = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p)*cos(lat2*p) * (1-cos((lon2-lon1)*p)) / 2
return 12742 * asin(sqrt(hav))
for index, row in df1.iterrows():
id1 = row['Address']
lat1 = row['lat']
lon1 = row['lon']
for index, row in df2.iterrows():
id2 = row['loc']
lat2 = row['lat']
lon2 = row['lon']
dist = distance(lat1,lon1,lat2,lon2)
new_df = new_df.append({"Store":id1 , "Location":id2 , "Distance":dist},ignore_index = True)
print(new_df)
How do I get location in df2
closest(minimum distance pair) to the location in df1
?