So, after some work and an idea from the scipy mailing list, I think that in my case (with a constant A and slowly varying k), the best way to do this is to use the following implementation.
class SearchSorted:
def __init__(self, tensor, use_k_optimization=True):
'''
use_k_optimization requires storing 4x the size of the tensor.
If use_k_optimization is True, the class will assume that successive calls will be made with similar k.
When this happens, we can cut the running time significantly by storing additional variables. If it won't be
called with successive k, set the flag to False, as otherwise would just consume more memory for no
good reason
'''
self.indices_sort = np.argsort(tensor)
self.sorted_tensor = tensor[self.indices_sort]
self.inv_indices_sort = np.argsort(self.indices_sort)
self.use_k_optimization = use_k_optimization
self.previous_indices_results = None
self.prev_idx_A_k_pair = None
def query(self, k):
midpoints = k[:-1] + np.diff(k) / 2
idx_count = np.searchsorted(self.sorted_tensor, midpoints)
idx_A_k_pair = []
count = 0
old_obj = 0
for obj in idx_count:
if obj != old_obj:
idx_A_k_pair.append((obj, count))
old_obj = obj
count += 1
if not self.use_k_optimization or self.previous_indices_results is None:
#creates the index matrix in the sorted case
final_indices = self._create_indices_matrix(idx_A_k_pair, self.sorted_tensor.shape, len(k))
#and now unsort it to match the original tensor position
indicesClosest = final_indices[self.inv_indices_sort]
if self.use_k_optimization:
self.prev_idx_A_k_pair = idx_A_k_pair
self.previous_indices_results = indicesClosest
return indicesClosest
old_indices_unsorted = self._create_indices_matrix(self.prev_idx_A_k_pair, self.sorted_tensor.shape, len(k))
new_indices_unsorted = self._create_indices_matrix(idx_A_k_pair, self.sorted_tensor.shape, len(k))
mask = new_indices_unsorted != old_indices_unsorted
self.prev_idx_A_k_pair = idx_A_k_pair
self.previous_indices_results[self.indices_sort[mask]] = new_indices_unsorted[mask]
indicesClosest = self.previous_indices_results
return indicesClosest
@staticmethod
def _create_indices_matrix(idx_A_k_pair, matrix_shape, len_quant_points):
old_idx = 0
final_indices = np.zeros(matrix_shape, dtype=int)
for idx_A, idx_k in idx_A_k_pair:
final_indices[old_idx:idx_A] = idx_k
old_idx = idx_A
final_indices[old_idx:] = len_quant_points - 1
return final_indices
The idea is to sort the array A beforehand, then use searchsorted of A on the midpoints of k. This gives the same information as before, in that it tells us exactly which points of A are closer to which points of k. The method _create_indices_matrix will create the full indices array from these informations, and then we will unsort it to recover the original order of A. To take advantage of slowly varying k, we save the last indices and we determine which indices we have to change; we then change only those. For slowly varying k, this produces superior performance (at a quite bigger memory cost, however).
For random matrix A of 5 million elements and k of about 30 elements, and repeating the experiments 60 times, we get
Function search_sorted1; 15.72285795211792s
Function search_sorted2; 13.030786037445068s
Function query; 2.3306031227111816s <- the one with use_k_optimization = True
Function query; 4.81286096572876s <- with use_k_optimization = False
scipy.spatial.KDTree.query is too slow, and I don't time it (above 1 minute, though). This is the code used to do the timing; contains also the implementation of search_sorted1 and 2.
import numpy as np
import scipy
import scipy.spatial
import time
A = np.random.rand(10000*500) #5 million elements
k = np.random.rand(32)
k.sort()
#first attempt, detailed in the answer, too
def search_sorted1(A, k):
indicesClosest = np.searchsorted(k, A)
flagToReduce = indicesClosest == k.shape[0]
modifiedIndicesToAvoidOutOfBoundsException = indicesClosest.copy()
modifiedIndicesToAvoidOutOfBoundsException[flagToReduce] -= 1
flagToReduce = np.logical_or(flagToReduce,
np.abs(A-k[indicesClosest-1]) <
np.abs(A - k[modifiedIndicesToAvoidOutOfBoundsException]))
flagToReduce = np.logical_and(indicesClosest > 0, flagToReduce)
indicesClosest[flagToReduce] -= 1
return indicesClosest
#taken from @Divakar answer linked in the comments under the question
def search_sorted2(A, k):
indicesClosest = np.searchsorted(k, A, side="left").clip(max=k.size - 1)
mask = (indicesClosest > 0) & \
((indicesClosest == len(k)) | (np.fabs(A - k[indicesClosest - 1]) < np.fabs(A - k[indicesClosest])))
indicesClosest = indicesClosest - mask
return indicesClosest
def kdquery1(A, k):
d = scipy.spatial.cKDTree(k, compact_nodes=False, balanced_tree=False)
_, indices = d.query(A)
return indices
#After an indea on scipy mailing list
class SearchSorted:
def __init__(self, tensor, use_k_optimization=True):
'''
Using this requires storing 4x the size of the tensor.
If use_k_optimization is True, the class will assume that successive calls will be made with similar k.
When this happens, we can cut the running time significantly by storing additional variables. If it won't be
called with successive k, set the flag to False, as otherwise would just consume more memory for no
good reason
'''
self.indices_sort = np.argsort(tensor)
self.sorted_tensor = tensor[self.indices_sort]
self.inv_indices_sort = np.argsort(self.indices_sort)
self.use_k_optimization = use_k_optimization
self.previous_indices_results = None
self.prev_idx_A_k_pair = None
def query(self, k):
midpoints = k[:-1] + np.diff(k) / 2
idx_count = np.searchsorted(self.sorted_tensor, midpoints)
idx_A_k_pair = []
count = 0
old_obj = 0
for obj in idx_count:
if obj != old_obj:
idx_A_k_pair.append((obj, count))
old_obj = obj
count += 1
if not self.use_k_optimization or self.previous_indices_results is None:
#creates the index matrix in the sorted case
final_indices = self._create_indices_matrix(idx_A_k_pair, self.sorted_tensor.shape, len(k))
#and now unsort it to match the original tensor position
indicesClosest = final_indices[self.inv_indices_sort]
if self.use_k_optimization:
self.prev_idx_A_k_pair = idx_A_k_pair
self.previous_indices_results = indicesClosest
return indicesClosest
old_indices_unsorted = self._create_indices_matrix(self.prev_idx_A_k_pair, self.sorted_tensor.shape, len(k))
new_indices_unsorted = self._create_indices_matrix(idx_A_k_pair, self.sorted_tensor.shape, len(k))
mask = new_indices_unsorted != old_indices_unsorted
self.prev_idx_A_k_pair = idx_A_k_pair
self.previous_indices_results[self.indices_sort[mask]] = new_indices_unsorted[mask]
indicesClosest = self.previous_indices_results
return indicesClosest
@staticmethod
def _create_indices_matrix(idx_A_k_pair, matrix_shape, len_quant_points):
old_idx = 0
final_indices = np.zeros(matrix_shape, dtype=int)
for idx_A, idx_k in idx_A_k_pair:
final_indices[old_idx:idx_A] = idx_k
old_idx = idx_A
final_indices[old_idx:] = len_quant_points - 1
return final_indices
mySearchSorted = SearchSorted(A, use_k_optimization=True)
mySearchSorted2 = SearchSorted(A, use_k_optimization=False)
allFunctions = [search_sorted1, search_sorted2,
mySearchSorted.query,
mySearchSorted2.query]
print(np.array_equal(mySearchSorted.query(k), kdquery1(A, k)[1]))
print(np.array_equal(mySearchSorted.query(k), search_sorted2(A, k)[1]))
print(np.array_equal(mySearchSorted2.query(k), search_sorted2(A, k)[1]))
if __name__== '__main__':
num_to_average = 3
for func in allFunctions:
if func.__name__ == 'search_sorted3':
indices_sort = np.argsort(A)
sA = A[indices_sort].copy()
inv_indices_sort = np.argsort(indices_sort)
else:
sA = A.copy()
if func.__name__ != 'query':
func_to_use = lambda x: func(sA, x)
else:
func_to_use = func
k_to_use = k
start_time = time.time()
for idx_average in range(num_to_average):
for idx_repeat in range(10):
k_to_use += (2*np.random.rand(*k.shape)-1)/100 #uniform between (-1/100, 1/100)
k_to_use.sort()
indices = func_to_use(k_to_use)
if func.__name__ == 'search_sorted3':
indices = indices[inv_indices_sort]
val = k[indices]
end_time = time.time()
total_time = end_time-start_time
print('Function {}; {}s'.format(func.__name__, total_time))
I'm sure that it still possible to do better (I use a loot of space for SerchSorted class, so we could probably save something). If you have any ideas for an improvement, please let me know!