Given your data set, the most efficient data structure in terms of both set-up and look-up time complexity is a binary search tree, which gives you O(n log n) set-up and O(log n) look-up time complexity with O(n) space complexity.
The standard BST algorithm doesn't include your two special constraints (as I understand them)
- return the value for the maximum key <= the specified key
- bound the search space between the minimum and maximum values in the map
Here is a BST implementation based on this implementation:
class Node(object):
def __init__(self, key, value, parent):
self.left = None
self.right = None
self.value = value
self.key = key
self.parent = parent
def __str__(self):
return ":".join(map(str, (self.key, self.value)))
class BinarySearchTree(object):
def __init__(self):
self.root = None
def getRoot(self):
return self.root
def __setitem__(self, key, value):
if(self.root == None):
self.root = Node(key, value, None)
else:
self._set(key, value, self.root)
def _set(self, key, value, node):
if key == node.key:
node.value = value
elif key < node.key:
if(node.left != None):
self._set(key, value, node.left)
else:
node.left = Node(key, value, node)
else:
if(node.right != None):
self._set(key, value, node.right)
else:
node.right = Node(key, value, node)
def __contains__(self, key):
return self._get(key) != None
def __getitem__(self, key):
if(self.root != None):
return self._get(key, self.root)
else:
return None
def _get(self, key, node):
if key == node.key:
return node.value
elif key < node.key and node.left != None:
return self._get(key, node.left)
elif key > node.key and node.right != None:
return self._get(key, node.right)
Here is a subclass that fulfills requirement 1:
class FuzzySearchTree(BinarySearchTree):
def _get(self, key, node):
if key == node.key:
return node.value
elif key < node.key:
if node.left != None:
return self._get(key, node.left)
else:
return self._checkMin(key, node)
else:
if node.right != None:
return self._get(key, node.right)
else:
return node.value # found the closest match that is larger
def _checkMin(self, key, node):
return node.value
To fulfill requirement 2, you need to keep track of the minimum value in the tree. You should probably do this by keeping track of the minimum value at insert time, but here is a different approach. This approach is not super efficient, but it should still be o(3 log n) == O(log n), so it's not bad. If you don't really need this, I wouldn't bother with it.
class MinBoundedFuzzySearchTree(FuzzySearchTree):
def _checkMin(self, key, node):
# Unless the value is lower than the minimum value in the tree # Not advised
next = node.parent
while next.parent != None:
next = next.parent # Go up the tree to the top
next = next.left
while next.left != None:
next = next.left # Go down the tree to the left
if next.key > key:
return None # outside the the range of the tree
# Return the max value less than the key, which is by definition the parent
return node.parent.value
Here are some pseudo-tests:
tree = BinarySearchTree()
tree[123] = 'Foo'
tree[456] = 'Bar'
tree[789] = 'Hello'
tree[-111] = 'World'
print "BST(456) == 'Bar': " + str(tree[456])
print "BST(235) == None: " + str(tree[235])
print "BST(455) == None: " + str(tree[455])
print "BST(999) == None: " + str(tree[999])
print "BST(0) == None: " + str(tree[0])
print "BST(123) == 'Foo': " + str(tree[123])
print "BST(-110) == None: " + str(tree[-110])
print "BST(-999) == None: " + str(tree[-999])
tree = FuzzySearchTree()
tree[123] = 'Foo'
tree[456] = 'Bar'
tree[789] = 'Hello'
tree[-111] = 'World'
print
print "FST(456) == 'Bar': " + str(tree[456])
print "FST(235) == 'Foo': " + str(tree[235])
print "FST(455) == 'Foo': " + str(tree[455])
print "FST(999) == 'Hello': " + str(tree[999])
print "FST(0) == 'World': " + str(tree[0])
print "FST(123) == 'Foo': " + str(tree[123])
print "FST(-110) == 'World': " + str(tree[-110])
print "FST(-999) == 'World': " + str(tree[-999])
tree = MinBoundedFuzzySearchTree()
tree[123] = 'Foo'
tree[456] = 'Bar'
tree[789] = 'Hello'
tree[-111] = 'World'
print
print "MBFST(456) == 'Bar': " + str(tree[456])
print "MBFST(235) == 'Foo': " + str(tree[235])
print "MBFST(455) == 'Foo': " + str(tree[455])
print "MBFST(999) == 'Hello': " + str(tree[999])
print "MBFST(0) == 'World': " + str(tree[0])
print "MBFST(123) == 'Foo': " + str(tree[123])
print "MBFST(-110) == 'World': " + str(tree[-110])
print "MBFST(-999) == None: " + str(tree[-999])
And here is what this prints:
"""
BST(456) == 'Bar': Bar
BST(235) == None: None
BST(455) == None: None
BST(999) == None: None
BST(0) == None: None
BST(123) == 'Foo': Foo
BST(-110) == None: None
BST(-999) == None: None
FST(456) == 'Bar': Bar
FST(235) == 'Foo': Foo
FST(455) == 'Foo': Foo
FST(999) == 'Hello': Hello
FST(0) == 'World': World
FST(123) == 'Foo': Foo
FST(-110) == 'World': World
FST(-999) == 'World': Foo
MBFST(456) == 'Bar': Bar
MBFST(235) == 'Foo': Foo
MBFST(455) == 'Foo': Foo
MBFST(999) == 'Hello': Hello
MBFST(0) == 'World': World
MBFST(123) == 'Foo': Foo
MBFST(-110) == 'World': World
MBFST(-999) == None: None
"""