This can be done fully in geopandas
- use UTM CRS so that distances are meaningful. Note this will calculate distance in meters, hence conversion factor to miles of 1609.34
- so you know which point (first point in group) has been used as reference, the index of this point in original data frame is captured
- this solution will work if there are 1, 2 or more points that share the same id
import pandas as pd
import geopandas as gpd
# sample data
df = pd.DataFrame(
**{
"columns": ["id", "Longitude", "Latitude"],
"data": [
[1, 35.624404, 34.542616],
[2, 35.637812, 34.52873],
[3, 35.433423, 34.465716],
[1, 35.439104, 34.468755],
[2, 35.512096, 34.524426],
[3, 35.512096, 34.524426],
],
}
)
gdf = gpd.GeoDataFrame(
df["id"],
geometry=gpd.points_from_xy(df["Latitude"], df["Longitude"]),
crs="epsg:4386",
)
gdf = gdf.to_crs(gdf.estimate_utm_crs())
# for each id, calculate distance in miles from first point
# for good measure capture index of point used to calc distance
gdf = (
gdf.groupby("id")
.apply(
lambda d: d.assign(
d=d["geometry"].distance(d["geometry"].iat[0]) / 1609.34,
i=d.index.values[0],
)
)
.to_crs("epsg:4326")
)
gdf
output
|
id |
geometry |
d |
i |
0 |
1 |
POINT (34.542616 35.624404000000006) |
0 |
0 |
1 |
2 |
POINT (34.52873 35.637812) |
0 |
1 |
2 |
3 |
POINT (34.465716 35.433423) |
0 |
2 |
3 |
1 |
POINT (34.468755 35.439104) |
13.4336 |
0 |
4 |
2 |
POINT (34.524426 35.512096) |
8.66908 |
1 |
5 |
3 |
POINT (34.524426 35.512096) |
6.35343 |
2 |