0
from math import radians, cos, sin, asin, sqrt

df = pd.DataFrame(columns=['Id', 'Feature', 'Lat', 'Long'])
df['Id'] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
df['Feature'] = ['Truck', 'Truck', 'Truck', 'Truck', 'Truck', 'Van', 'Van', 'Van', 'Van', 'Car', 'Car', 'Car']
df['Lat'] = [39.57713, 39.57723, 39.57671, 39.57672, 39.57697, 39.57188, 39.57151, 39.57153, 39.57197, 39.57613, 39.57577, 39.57595]
df['Long'] = [46.87062, 46.87004, 46.87001, 46.87066, 46.87027, 46.87489, 46.87482, 46.8752, 46.87528, 46.8757, 46.87572, 46.87545]

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    # Radius of earth in meters is 6371000
    distance = 6371000* c
    return distance

How can I check to see what Truck / Car ID is within 420 Meters, what Truck / Van ID is within 655 Meters and what Car / Van ID is within 425 Meters?

Ideal output would be:

Truck 3 is within distance of Car 11

Truck 3 is within distance of Van 5

Car 10 is within distance of Van 8

NoobPythoner
  • 87
  • 10
  • Within X meters of what? Of a given point? Of each other? – Cimbali Jul 30 '21 at 12:40
  • Does this answer your question? [Pandas: calculate haversine distance within each group of rows](https://stackoverflow.com/questions/43577086/pandas-calculate-haversine-distance-within-each-group-of-rows) – mozway Jul 30 '21 at 12:42
  • @Cimbali Trucks -> Cars (what trucks are within 420 Meters of cars) Trucks -> Vans (what trucks are within 655 Meters of vans) Cars -> Vans (what cars are within 425 Meters of Vans) – NoobPythoner Jul 30 '21 at 12:44
  • @mozway Negative.. That is calculating distance between same ID. – NoobPythoner Jul 30 '21 at 12:46
  • All your `Id`s are different here. Should the `Id` column be `[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,]`? If not please provide the expected output. – mozway Jul 30 '21 at 12:58
  • @mozey Yes all should be different. Those are the unique identifies to help determine which point is within x meters – NoobPythoner Jul 30 '21 at 13:04
  • Then you want to calculate the distance between the rows, not *same ID*. It really looks likes a variation of my above link. – mozway Jul 30 '21 at 13:08

1 Answers1

0

You can generate all pairs you want with pd.merge(how='cross'):

>>> groups = df.groupby('Feature')
>>> pd.merge(groups.get_group('Car'), groups.get_group('Truck'), how='cross', suffixes=('', '_cmp'))
    Id Feature       Lat      Long  Id_cmp Feature_cmp   Lat_cmp  Long_cmp
0    9     Car  39.57613  46.87570       0       Truck  39.57713  46.87062
1    9     Car  39.57613  46.87570       1       Truck  39.57723  46.87004
2    9     Car  39.57613  46.87570       2       Truck  39.57671  46.87001
3    9     Car  39.57613  46.87570       3       Truck  39.57672  46.87066
4    9     Car  39.57613  46.87570       4       Truck  39.57697  46.87027
5   10     Car  39.57577  46.87572       0       Truck  39.57713  46.87062
6   10     Car  39.57577  46.87572       1       Truck  39.57723  46.87004
7   10     Car  39.57577  46.87572       2       Truck  39.57671  46.87001
8   10     Car  39.57577  46.87572       3       Truck  39.57672  46.87066
9   10     Car  39.57577  46.87572       4       Truck  39.57697  46.87027
10  11     Car  39.57595  46.87545       0       Truck  39.57713  46.87062
11  11     Car  39.57595  46.87545       1       Truck  39.57723  46.87004
12  11     Car  39.57595  46.87545       2       Truck  39.57671  46.87001
13  11     Car  39.57595  46.87545       3       Truck  39.57672  46.87066
14  11     Car  39.57595  46.87545       4       Truck  39.57697  46.87027

This allows to easily generate all comparisons we want to make:

>>> distances = {('Car', 'Truck'): 420, ('Truck', 'Van'): 655, ('Car', 'Van'): 425}
>>> all_cmp = pd.concat([pd.merge(groups.get_group(dist_from), groups.get_group(dist_to), how='cross', suffixes=('', '_cmp')) for dist_from, dist_to in distances])
>>> all_cmp.head()
   Id Feature       Lat     Long  Id_cmp Feature_cmp   Lat_cmp  Long_cmp
0   9     Car  39.57613  46.8757       0       Truck  39.57713  46.87062
1   9     Car  39.57613  46.8757       1       Truck  39.57723  46.87004
2   9     Car  39.57613  46.8757       2       Truck  39.57671  46.87001
3   9     Car  39.57613  46.8757       3       Truck  39.57672  46.87066
4   9     Car  39.57613  46.8757       4       Truck  39.57697  46.87027
>>> all_cmp.tail()
    Id Feature       Lat      Long  Id_cmp Feature_cmp   Lat_cmp  Long_cmp
7   10     Car  39.57577  46.87572       8         Van  39.57197  46.87528
8   11     Car  39.57595  46.87545       5         Van  39.57188  46.87489
9   11     Car  39.57595  46.87545       6         Van  39.57151  46.87482
10  11     Car  39.57595  46.87545       7         Van  39.57153  46.87520
11  11     Car  39.57595  46.87545       8         Van  39.57197  46.87528

We can easily compute the distances and we also need to align the threshold distances:

>>> dist = all_cmp.agg(lambda s: haversine(s['Lat'], s['Long'], s['Lat_cmp'], s['Long_cmp']), axis='columns')
>>> thresh = all_cmp[['Feature', 'Feature_cmp']].agg(lambda s: distances[tuple(s)], axis='columns')

And from there just compare, keep the lines you want, possibly aggregate:

>>> all_cmp[dist < thresh]
    Id Feature       Lat      Long  Id_cmp Feature_cmp   Lat_cmp  Long_cmp
0    0   Truck  39.57713  46.87062       5         Van  39.57188  46.87489
1    0   Truck  39.57713  46.87062       6         Van  39.57151  46.87482
3    0   Truck  39.57713  46.87062       8         Van  39.57197  46.87528
12   3   Truck  39.57672  46.87066       5         Van  39.57188  46.87489
13   3   Truck  39.57672  46.87066       6         Van  39.57151  46.87482
14   3   Truck  39.57672  46.87066       7         Van  39.57153  46.87520
15   3   Truck  39.57672  46.87066       8         Van  39.57197  46.87528
16   4   Truck  39.57697  46.87027       5         Van  39.57188  46.87489
17   4   Truck  39.57697  46.87027       6         Van  39.57151  46.87482
0    9     Car  39.57613  46.87570       5         Van  39.57188  46.87489
1    9     Car  39.57613  46.87570       6         Van  39.57151  46.87482
2    9     Car  39.57613  46.87570       7         Van  39.57153  46.87520
3    9     Car  39.57613  46.87570       8         Van  39.57197  46.87528
4   10     Car  39.57577  46.87572       5         Van  39.57188  46.87489
5   10     Car  39.57577  46.87572       6         Van  39.57151  46.87482
6   10     Car  39.57577  46.87572       7         Van  39.57153  46.87520
7   10     Car  39.57577  46.87572       8         Van  39.57197  46.87528
8   11     Car  39.57595  46.87545       5         Van  39.57188  46.87489
9   11     Car  39.57595  46.87545       6         Van  39.57151  46.87482
10  11     Car  39.57595  46.87545       7         Van  39.57153  46.87520
11  11     Car  39.57595  46.87545       8         Van  39.57197  46.87528
>>> close = all_cmp[dist < thresh].groupby('Id')['Id_cmp'].agg(list)
>>> close
Id
0        [5, 6, 8]
3     [5, 6, 7, 8]
4           [5, 6]
9     [5, 6, 7, 8]
10    [5, 6, 7, 8]
11    [5, 6, 7, 8]
Name: Id_cmp, dtype: object
>>> df.merge(close.rename('within dist').reset_index())
   Id Feature       Lat      Long   within dist
0   0   Truck  39.57713  46.87062     [5, 6, 8]
1   3   Truck  39.57672  46.87066  [5, 6, 7, 8]
2   4   Truck  39.57697  46.87027        [5, 6]
3   9     Car  39.57613  46.87570  [5, 6, 7, 8]
4  10     Car  39.57577  46.87572  [5, 6, 7, 8]
5  11     Car  39.57595  46.87545  [5, 6, 7, 8]
Cimbali
  • 11,012
  • 1
  • 39
  • 68
  • From my manual calculations, these are the only features that fall within the specified distance: Truck 3 is within distance of Car 11 Truck 3 is within distance of Van 5 Car 10 is within distance of Van 8 – NoobPythoner Jul 30 '21 at 13:41
  • @NoobPythoner I reused your `haversine` function. Check if the distances and thresholds that this code compute are the same as your calculations. – Cimbali Jul 30 '21 at 16:40