We can do this quite fast using np.lexsort
and some masking
def lex(arr):
tmp = arr[np.lexsort(arr.T),:]
tmp = tmp[np.append([True],np.any(np.diff(tmp,axis=0),1))]
return tmp[np.lexsort((tmp[:, 1], tmp[:, 0]), axis=0)]
L = np.array(L)
lex(L)
# Output:
[[300. 400. ]
[349.9 486.6]
[350. 313.3]
[450. 313.3]
[450. 486.6]
[500. 400. ]]
Performance
Functions
def chrisz(arr):
tmp = arr[np.lexsort(arr.T),:]
tmp = tmp[np.append([True],np.any(np.diff(tmp,axis=0),1))]
return tmp[np.lexsort((tmp[:, 1], tmp[:, 0]), axis=0)]
def pp(data):
return [k for k, g in itertools.groupby(sorted(data))]
def gazer(data):
return np.unique(data, axis=0)
def wim(L):
return sorted({tuple(x): x for x in L}.values())
def jpp(L):
return sorted(unique(L, key=tuple))
Setup
res = pd.DataFrame(
index=['chrisz', 'pp', 'gazer', 'wim', 'jpp'],
columns=[10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000],
dtype=float
)
for f in res.index:
for c in res.columns:
npL = np.random.randint(1,1000,(c,2)) + np.random.choice(np.random.random(1000), (c, 2))
L = npL.tolist()
stmt = '{}(npL)'.format(f) if f in {'chrisz', 'gazer'} else '{}(L)'.format(f)
setp = 'from __main__ import L, npL, {}'.format(f)
res.at[f, c] = timeit(stmt, setp, number=50)
ax = res.div(res.min()).T.plot(loglog=True)
ax.set_xlabel("N");
ax.set_ylabel("time (relative)");
plt.show()

Validation
npL = np.random.randint(1,1000,(100000,2)) + np.random.choice(np.random.random(1000), (100000, 2))
L = npL.tolist()
chrisz(npL).tolist() == pp(L) == gazer(npL).tolist() == wim(L) == jpp(L)
True