Parallel Slopes Ordinary Least Squares (OLS) Model:
y = mx + grp + b where m=slope, b=y-intercept, & grp=categorical variable.
This is an alternative algorithm that can handle a rotated grid.
The OLS model includes both the data points in the original orientation
and a 90° rotation of the same data points. This is necessary so all gridlines are parallel and have the same slope.
Algorithm:
- Find a reference gridline to compare with remaining points by choosing two neighboring points in the first or last row with a slope closest to zero.
- Calculate the distances between this reference line and the remaining points.
- Segment points into groups w.r.t. the calculated distances (one group per gridline).
- Repeat steps 1 to 3 for the 90 degree rotated grid and combine results.
- Create a parallel slopes OLS model to determine linear equations for the gridlines.
- Rotate the rotated gridlines back to their original orientation.
- Calculate the intersection points.
Note: Fails if noise, angle and/or missing data are too much.
Example:

Python Code to Create Example
def create_random_example():
# Requires import of numpy and random packages
# Creates grid with random noise and missing points
# Example will fail if std_dev, rotation, pct_removed too large
# Parameters
first_row, last_row = 100, 900
first_col, last_col = 100, 600
num_rows = 6
num_cols = 4
rotation = 3 # degrees that grid is rotated
sd = 3 # percent std dev of avg x and avg y coordinates
pct_remove = 30 # percent of points to randomly remove from data
# Create grid
x = np.linspace(first_col, last_col, num_cols)
y = np.linspace(first_row, last_row, num_rows)
xx, yy = np.meshgrid(x, y)
# Add noise
x = xx.flatten() + sd * np.mean(xx) * np.random.randn(xx.size) / 100
y = yy.flatten() + sd * np.mean(yy) * np.random.randn(yy.size) / 100
# Randomly remove points
random_list = random.sample(range(0, num_cols*num_rows),
int(pct_remove*num_cols*num_rows/100))
x, y = np.delete(x, random_list), np.delete(y, random_list)
pts = np.column_stack((x, y))
# Rotate points
radians = np.radians(rotation)
rot_mat = np.array([[np.cos(radians),-np.sin(radians)],
[np.sin(radians), np.cos(radians)]])
einsum = np.einsum('ji, mni -> jmn', rot_mat, [pts])
pts = np.squeeze(einsum).T
return np.rint(pts)
Python Code to Fit Gridlines
import numpy as np
import pandas as pd
import itertools
import math
import random
from statsmodels.formula.api import ols
from scipy.spatial import KDTree
import matplotlib.pyplot as plt
def pt_line_dist(pt, ref_line):
pt1, pt2 = [ref_line[:2], ref_line[2:]]
# Distance from point to line defined by two other points
return np.linalg.norm(np.cross(pt1 - pt2, [pt[0],pt[1]])) \
/ np.linalg.norm(pt1 - pt2)
def segment_pts(amts, grp_var, grp_label):
# Segment on amounts (distances here) in last column of array
# Note: need to label groups with string for OLS model
amts = amts[amts[:, -1].argsort()]
first_amt_in_grp = amts[0][-1]
group, groups, grp = [], [], 0
for amt in amts:
if amt[-1] - first_amt_in_grp > grp_var:
groups.append(group)
first_amt_in_grp = amt[-1]
group = []; grp += 1
group.append(np.append(amt[:-1],[[grp_label + str(grp)]]))
groups.append(group)
return groups
def find_reference_line(pts):
# Find point with minimum absolute slope relative both min y and max y
y = np.hsplit(pts, 2)[1] # y column of array
m = []
for i, y_pt in enumerate([ pts[np.argmin(y)], pts[np.argmax(y)] ]):
m.append(np.zeros((pts.shape[0]-1, 5))) # dtype default is float64
m[i][:,2:4] = np.delete(pts, np.where((pts==y_pt).all(axis=1))[0], axis=0)
m[i][:,4] = abs( (m[i][:,3]-y_pt[1]) / (m[i][:,2]-y_pt[0]) )
m[i][:,:2] = y_pt
m = np.vstack((m[0], m[1]))
return m[np.argmin(m[:,4]), :4]
# Ignore division by zero (slopes of vertical lines)
np.seterr(divide='ignore')
# Create dataset and plot
pts = create_random_example()
plt.scatter(pts[:,0], pts[:,1], c='r') # plot now because pts array changes
# Average distance to the nearest neighbor of each point
tree = KDTree(pts)
nn_avg_dist = np.mean(tree.query(pts, 2)[0][:, 1])
# Find groups of points representing each gridline
groups = []
for orientation in ['o', 'r']: # original and rotated orientations
# Rotate points 90 degrees (note: this moves pts to 2nd quadrant)
if orientation == 'r':
pts[:,1] = -1 * pts[:,1]
pts[:, [1, 0]] = pts[:, [0, 1]]
# Find reference line to compare remaining points for grouping
ref_line = find_reference_line(pts) # line is defined by two points
# Distances between points and reference line
pt_dists = np.zeros((pts.shape[0], 3))
pt_dists[:,:2] = pts
pt_dists[:,2] = np.apply_along_axis(pt_line_dist, 1, pts, ref_line).T
# Segment pts into groups w.r.t. distances (one group per gridline)
# Groups have range less than nn_avg_dist.
groups += segment_pts(pt_dists, 0.7*nn_avg_dist, orientation)
# Create dataframe of groups (OLS model requires a dataframe)
df = pd.DataFrame(np.row_stack(groups), columns=['x', 'y', 'grp'])
df['x'] = pd.to_numeric(df['x'])
df['y'] = pd.to_numeric(df['y'])
# Parallel slopes OLS model
ols_model = ols("y ~ x + grp + 0", data=df).fit()
# OLS parameters
grid_lines = ols_model.params[:-1].to_frame() # panda series to dataframe
grid_lines = grid_lines.rename(columns = {0:'b'})
grid_lines['grp'] = grid_lines.index.str[4:6]
grid_lines['m'] = ols_model.params[-1] # slope
# Rotate the rotated lines back to their original orientation
grid_lines.loc[grid_lines['grp'].str[0] == 'r', 'b'] = grid_lines['b'] / grid_lines['m']
grid_lines.loc[grid_lines['grp'].str[0] == 'r', 'm'] = -1 / grid_lines['m']
# Find grid intersection points by combinations of gridlines
comb = list(itertools.combinations(grid_lines['grp'], 2))
comb = [i for i in comb if i[0][0] != 'r']
comb = [i for i in comb if i[1][0] != 'o']
df_comb = pd.DataFrame(comb, columns=['grp', 'r_grp'])
# Merge gridline parameters with grid points
grid_pts = df_comb.merge(grid_lines.drop_duplicates('grp'),how='left',on='grp')
grid_lines.rename(columns={'grp': 'r_grp'}, inplace=True)
grid_pts.rename(columns={'b':'o_b', 'm': 'o_m', 'grp':'o_grp'}, inplace=True)
grid_pts = grid_pts.merge(grid_lines.drop_duplicates('r_grp'),how='left',on='r_grp')
grid_pts.rename(columns={'b':'r_b', 'm': 'r_m'}, inplace=True)
# Calculate x, y coordinates of gridline interception points
grid_pts['x'] = (grid_pts['r_b']-grid_pts['o_b']) \
/ (grid_pts['o_m']-grid_pts['r_m'])
grid_pts['y'] = grid_pts['o_m'] * grid_pts['x'] + grid_pts['o_b']
# Results output
print(grid_lines)
print(grid_pts)
plt.scatter(grid_pts['x'], grid_pts['y'], s=8, c='b') # for setting axes
axes = plt.gca()
axes.invert_yaxis()
axes.xaxis.tick_top()
axes.set_aspect('equal')
axes.set_xlim(axes.get_xlim())
axes.set_ylim(axes.get_ylim())
x_vals = np.array(axes.get_xlim())
for idx in grid_lines.index:
y_vals = grid_lines['b'][idx] + grid_lines['m'][idx] * x_vals
plt.plot(x_vals, y_vals, c='gray')
plt.show()