- I have tried with your sample data. There are not enough orders such that the convex hull of the points cover any category
- have simulated some data to demonstrate
- create geopandas data frame of orders
- create geopandas data frame of convex hull of points that make up categories
sjoin()
two GeoDataFrames to find association you require
- have provided a visualisation to better demonstrate how this works
import geopandas as gpd
import pandas as pd
import numpy as np
gdf = gpd.read_file(gpd.datasets.get_path("naturalearth_cities"))
gdf = gdf.loc[gdf["name"].isin(["London", "Paris", "Brussels"])]
# gdf = gdf.sample(10)
# pandas dataframes structured as per question
df1 = pd.DataFrame(
{"Long": gdf["geometry"].x, "Lat": gdf["geometry"].y, "Order_ID": gdf["name"]}
)
N = 8
df2 = pd.concat(
[
pd.DataFrame(
{
"Long": np.random.uniform(r.minx, r.maxx, N),
"Lat": np.random.uniform(r.miny, r.maxy, N),
"Category": np.full(N, chr(65 + _)),
}
)
for _, r in gdf.reset_index()
.to_crs(gdf.estimate_utm_crs())
.buffer(3 * 10**5)
.to_crs(gdf.crs)
.bounds.iterrows()
]
)
# sample geometry, not enough orders to work effectively
# df1 = pd.DataFrame(
# **{
# "index": [0, 1, 2],
# "columns": ["Order_ID", "Lat", "Long"],
# "data": [[1, 32.0455, -76.9876], [2, 32.5679, -77.3421], [3, 33.4567, -77.987]],
# }
# )
# df2 = pd.DataFrame(
# **{
# "index": [0, 1, 2, 3, 4, 5],
# "columns": ["Category", "Lat", "Long"],
# "data": [
# ["S1", 32.0109, -76.0765],
# ["S1", 32.8769, -77.5674],
# ["S1", 33.1987, -78.7654],
# ["S2", 33.5967, -78.0765],
# ["S2", 33.8769, -79.5674],
# ["S2", 34.1987, -79.7654],
# ],
# }
# )
gdf1 = gpd.gpd.GeoDataFrame(
df1["Order_ID"],
geometry=gpd.points_from_xy(x=df1["Long"], y=df1["Lat"]),
crs="epsg:4386",
)
# want convex hull of all points that make up a category
gdf2 = (
gpd.GeoDataFrame(
df2["Category"],
geometry=gpd.points_from_xy(x=df2["Long"], y=df2["Lat"]),
crs="epsg:4386",
)
.dissolve("Category")
.convex_hull.reset_index()
)
# get association between order and category using geometry
gpd.sjoin(gdf1, gdf2)
|
Order_ID |
geometry |
index_right |
Category |
158 |
Brussels |
POINT (4.33137074969045 50.83526293533032) |
0 |
A |
187 |
London |
POINT (-0.118667702475932 51.5019405883275) |
1 |
B |
199 |
Paris |
POINT (2.33138946713035 48.86863878981461) |
2 |
C |
visualise
# visualise it...
m = gdf2.explore(height=300, width=500)
gdf1.explore(m=m, color="red")
