Without knowing anything more about the range and precision of the values, or assuming too much about the distribution of lookups versus reference point list changes, some simple optimizations to a for
loop yields about a 30x speedup for 100 lookups over the faster OrderBy
/First
code:
Using pld
for your ProcessedLineData
and data
for Modifications.Value_and_Ref_Calc.ReferenceFile.Dataset[0].Data
you get:
var _closestSample = data[0];
var dist = (_closestSample.Easting - pld.Easting) * (_closestSample.Easting - pld.Easting) + (_closestSample.Northing - pld.Northing) * (_closestSample.Northing - pld.Northing);
for (int j2 = 1; j2 < data.Count; ++j2) {
var y = data[j2];
var ydist = (y.Easting - pld.Easting) * (y.Easting - pld.Easting) + (y.Northing - pld.Northing) * (y.Northing - pld.Northing);
if (ydist < dist) {
dist = ydist;
_closestSample = y;
}
}
With my timing over a 2,000,000 entry data
list and 100 lookups, the OrderBy
/First
takes 2.22 seconds and the for
takes 0.06 seconds for a 32x speedup.
So, I was sure there was a better way than brute force, and after some research, discovered Morton Codes and Hilbert Curves. A little work generated a SpatialIndex
class using the Hilbert Curve and a SpatialIndexMorton
class using the Morton Index. I also adjusted the Hilbert Index to only index bits 16-32, which provided the optimal lookups per second. For my data, the Morton Curve is somewhat faster now.
Using the same random data tests, I get that the for
method can do 147 lookups per second, the Hilbert Index 5634 lookups per second and the Morton Index 7370 lookups per second over 10,000 lookups with 2,000,000 reference points. Note that the spatial indexes have about 3 seconds of setup time, so for very few lookups it is faster to do brute force with for
- I get a break even time around 468 lookups.
In order to make this (somewhat) general purpose, I started with a (C# 8.0) interface for Earth coordinates that provides some helper methods:
public interface ICoordinate {
double Longitude { get; set; }
double Latitude { get; set; }
public ulong MortonCode() {
float f = (float)Latitude;
uint ui;
unsafe { // perform unsafe cast (preserving raw binary)
float* fRef = &f;
ui = *((uint*)fRef);
}
ulong ixl = ui;
f = (float)Longitude;
unsafe { // perform unsafe cast (preserving raw binary)
float* fRef = &f;
ui = *((uint*)fRef);
}
ulong iyl = ui;
ixl = (ixl | (ixl << 16)) & 0x0000ffff0000ffffL;
iyl = (iyl | (iyl << 16)) & 0x0000ffff0000ffffL;
ixl = (ixl | (ixl << 8)) & 0x00ff00ff00ff00ffL;
iyl = (iyl | (iyl << 8)) & 0x00ff00ff00ff00ffL;
ixl = (ixl | (ixl << 4)) & 0x0f0f0f0f0f0f0f0fL;
iyl = (iyl | (iyl << 4)) & 0x0f0f0f0f0f0f0f0fL;
ixl = (ixl | (ixl << 2)) & 0x3333333333333333L;
iyl = (iyl | (iyl << 2)) & 0x3333333333333333L;
ixl = (ixl | (ixl << 1)) & 0x5555555555555555L;
iyl = (iyl | (iyl << 1)) & 0x5555555555555555L;
return ixl | (iyl << 1);
}
const int StartBitMinus1 = 31;
const int EndBit = 16;
//convert (x,y) to 31-bit Hilbert Index
public ulong HilbertIndex() {
float f = (float)Latitude;
uint x;
unsafe { // perform unsafe cast (preserving raw binary)
float* fRef = &f;
x = *((uint*)fRef);
}
f = (float)Longitude;
uint y;
unsafe { // perform unsafe cast (preserving raw binary)
float* fRef = &f;
y = *((uint*)fRef);
}
ulong hi = 0;
for (int bitpos = StartBitMinus1; bitpos >= EndBit; --bitpos) {
// extract s'th bit from x & y
var rx = (x >> bitpos) & 1;
var ry = (y >> bitpos) & 1;
hi <<= 2;
hi += (rx << 1) + (rx ^ ry);
//rotate/flip a quadrant appropriately
if (ry == 0) {
if (rx == 1) {
x = ~x;
y = ~y;
}
//Swap x and y
uint t = x;
x = y;
y = t;
}
}
return hi;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public double DistanceTo(ICoordinate b) =>
Math.Sqrt((Longitude - b.Longitude) * (Longitude - b.Longitude) + (Latitude - b.Latitude) * (Latitude - b.Latitude));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public double Distance2To(ICoordinate b) => (Longitude - b.Longitude) * (Longitude - b.Longitude) + (Latitude - b.Latitude) * (Latitude - b.Latitude);
public ICoordinate MakeNew(double plat, double plong);
}
public static class ICoordinateExt {
public static ICoordinate Minus(this ICoordinate a, ICoordinate b) =>
a.MakeNew(a.Latitude - b.Latitude, a.Longitude - b.Longitude);
public static ICoordinate Plus(this ICoordinate a, ICoordinate b) =>
a.MakeNew(a.Latitude + b.Latitude, a.Longitude + b.Longitude);
}
Then, a real class to implement the interface (which would be replaced with your real class):
public class PointOfInterest : ICoordinate {
public double Longitude { get; set; }
public double Latitude { get; set; }
public PointOfInterest(double plat, double plong) {
Latitude = plat;
Longitude = plong;
}
public ICoordinate MakeNew(double plat, double plong) => new PointOfInterest(plat, plong);
}
And a class to convert a IEnumerable<ICoordinate>
into a spatially indexed collection of ICoordinate
using the Hilbert Curve:
public class SpatialIndex {
SortedList<ulong, List<ICoordinate>> orderedData;
List<ulong> orderedIndexes;
public SpatialIndex(IEnumerable<ICoordinate> data) {
orderedData = data.GroupBy(d => d.HilbertIndex()).ToSortedList(g => g.Key, g => g.ToList());
orderedIndexes = orderedData.Keys.ToList();
}
public ICoordinate FindNearest(ICoordinate aPoint) {
var hi = aPoint.HilbertIndex();
var nearestIndex = orderedIndexes.FindNearestIndex(hi);
var nearestGuess = orderedData.Values[nearestIndex][0];
var guessDist = (nearestGuess.Longitude - aPoint.Longitude) * (nearestGuess.Longitude - aPoint.Longitude) + (nearestGuess.Latitude - aPoint.Latitude) * (nearestGuess.Latitude - aPoint.Latitude);
if (nearestIndex > 0) {
var tryGuess = orderedData.Values[nearestIndex-1][0];
var tryDist = (tryGuess.Longitude - aPoint.Longitude) * (tryGuess.Longitude - aPoint.Longitude) + (tryGuess.Latitude - aPoint.Latitude) * (tryGuess.Latitude - aPoint.Latitude);
if (tryDist < guessDist) {
nearestGuess = tryGuess;
guessDist = tryDist;
}
}
var offsetPOI = new PointOfInterest(guessDist, guessDist);
var minhi = (aPoint.Minus(offsetPOI)).HilbertIndex();
var minhii = orderedIndexes.FindNearestIndex(minhi);
if (minhii > 0)
--minhii;
var maxhi = (aPoint.Plus(offsetPOI)).HilbertIndex();
var maxhii = orderedIndexes.FindNearestIndex(maxhi);
for (int j2 = minhii; j2 < maxhii; ++j2) {
var tryList = orderedData.Values[j2];
for (int j3 = 0; j3 < tryList.Count; ++j3) {
var y = tryList[j3];
var ydist = (y.Longitude - aPoint.Longitude) * (y.Longitude - aPoint.Longitude) + (y.Latitude - aPoint.Latitude) * (y.Latitude - aPoint.Latitude);
if (ydist < guessDist) {
nearestGuess = y;
guessDist = ydist;
}
}
}
return nearestGuess;
}
}
And a similar class using the Morton Curve:
public class SpatialIndexMorton {
SortedList<ulong, List<ICoordinate>> orderedData;
List<ulong> orderedIndexes;
public SpatialIndexMorton(IEnumerable<ICoordinate> data) {
orderedData = data.GroupBy(d => d.MortonCode()).ToSortedList(g => g.Key, g => g.ToList());
orderedIndexes = orderedData.Keys.ToList();
}
public ICoordinate FindNearest(ICoordinate aPoint) {
var mc = aPoint.MortonCode();
var nearestIndex = orderedIndexes.FindNearestIndex(mc);
var nearestGuess = orderedData.Values[nearestIndex][0];
var guessDist = (nearestGuess.Longitude - aPoint.Longitude) * (nearestGuess.Longitude - aPoint.Longitude) + (nearestGuess.Latitude - aPoint.Latitude) * (nearestGuess.Latitude - aPoint.Latitude);
if (nearestIndex > 0) {
var tryGuess = orderedData.Values[nearestIndex-1][0];
var tryDist = (tryGuess.Longitude - aPoint.Longitude) * (tryGuess.Longitude - aPoint.Longitude) + (tryGuess.Latitude - aPoint.Latitude) * (tryGuess.Latitude - aPoint.Latitude);
if (tryDist < guessDist) {
nearestGuess = tryGuess;
guessDist = tryDist;
}
}
var offsetPOI = new PointOfInterest(guessDist, guessDist);
var minmc = (aPoint.Minus(offsetPOI)).MortonCode();
var minmci = orderedIndexes.FindNearestIndex(minmc);
if (minmci > 0)
--minmci;
var maxmc = (aPoint.Plus(offsetPOI)).MortonCode();
var maxmci = orderedIndexes.FindNearestIndex(maxmc);
for (int j2 = minmci; j2 < maxmci; ++j2) {
var tryList = orderedData.Values[j2];
for (int j3 = 0; j3 < tryList.Count; ++j3) {
var y = tryList[j3];
var ydist = (y.Longitude - aPoint.Longitude) * (y.Longitude - aPoint.Longitude) + (y.Latitude - aPoint.Latitude) * (y.Latitude - aPoint.Latitude);
if (ydist < guessDist) {
nearestGuess = y;
guessDist = ydist;
}
}
}
return nearestGuess;
}
}
And some helper extension methods:
public static class ListExt {
public static int FindNearestIndex<T>(this List<T> l, T possibleKey) {
var keyIndex = l.BinarySearch(possibleKey);
if (keyIndex < 0) {
keyIndex = ~keyIndex;
if (keyIndex == l.Count)
keyIndex = l.Count - 1;
}
return keyIndex;
}
}
public static class IEnumerableExt {
public static SortedList<TKey, TValue> ToSortedList<T, TKey, TValue>(this IEnumerable<T> src, Func<T, TKey> keySelector, Func<T, TValue> valueSelector) =>
new SortedList<TKey, TValue>(src.ToDictionary(keySelector, valueSelector));
}
Finally, some sample code using this, with your references in data
, and your lookup values in plds
:
var hilbertIndex = new SpatialIndex(data);
var ans = new (ICoordinate, ICoordinate)[lookups];
for (int j1 = 0; j1 < lookups; ++j1) {
ICoordinate pld = plds[j1];
ans[j1] = (pld, hilbertIndex.FindNearest(pld));
}
UPDATE: I modified the find nearest algorithm to take the closest of the point above and below the target point on the index instead of just trying the one above. This provides another decent speedup.