Here's an example on how to use sqlite to solve your problem. The searches are indexed so they're about as fast as you can get them:
import os
import random
import sqlite3
import contextlib
from datetime import datetime
@contextlib.contextmanager
def get_connection():
connection = sqlite3.connect('database.db')
try:
yield connection
connection.commit()
finally:
connection.close()
@contextlib.contextmanager
def get_cursor():
with get_connection() as connection:
cursor = connection.cursor()
try:
yield cursor
finally:
cursor.close()
def initial_create():
# read your giant array from a file here
with get_cursor() as cursor:
cursor.executescript('''
BEGIN;
CREATE TABLE big_list (
a int,
b int,
value text
);
CREATE INDEX big_list__a_b_idx ON big_list (a, b);
COMMIT;
''')
for i in range(1000):
a = random.randint(0, 1000000)
inserts = []
for j in range(1000):
b = random.randint(0, 1000000)
inserts.append((
a,
b,
'some string (%d,%d)' % (a, b),
))
cursor.executemany('INSERT INTO big_list VALUES (?,?,?)', inserts)
def test():
with get_cursor() as cursor:
print 'Total rows: %d' % (
cursor.execute('SELECT COUNT(*) FROM big_list').fetchone())
print 'Exact searches:'
for i in range(10):
start = datetime.now()
result = cursor.execute('''
SELECT *
FROM big_list
WHERE a = %d
AND b = %d
''' % (i * 1000, i * 10000))
print 'Got results: %r in' % result.fetchall(),
print datetime.now() - start
print 'Ranged searches:'
for i in range(10):
start = datetime.now()
result = cursor.execute('''
SELECT *
FROM big_list
WHERE a BETWEEN %d AND %d
AND b BETWEEN %d AND %d
''' % (i * 1000, (i + 1) * 1000, i * 1000, (i + 1) * 1000))
print 'Got results: %r in' % result.fetchall(),
print datetime.now() - start
if __name__ == '__main__':
if not os.path.isfile('database.db'):
print 'Creating database, this should only be needed once'
initial_create()
test()
Example output:
Total rows: 1000000
Exact searches:
Got results: [] in 0:00:00.000113
Got results: [] in 0:00:00.000055
Got results: [] in 0:00:00.000044
Got results: [] in 0:00:00.000044
Got results: [] in 0:00:00.000043
Got results: [] in 0:00:00.000045
Got results: [] in 0:00:00.000043
Got results: [] in 0:00:00.000041
Got results: [] in 0:00:00.000044
Got results: [] in 0:00:00.000041
Ranged searches:
Got results: [(604, 31, u'some string (604,31)'), (604, 386, u'some string (604,386)')] in 0:00:00.000889
Got results: [(1142, 1856, u'some string (1142,1856)')] in 0:00:00.000538
Got results: [] in 0:00:00.000056
Got results: [(3802, 3983, u'some string (3802,3983)')] in 0:00:00.000482
Got results: [] in 0:00:00.000165
Got results: [] in 0:00:00.000047
Got results: [] in 0:00:00.000164
Got results: [(7446, 7938, u'some string (7446,7938)'), (7947, 7381, u'some string (7947,7381)')] in 0:00:00.000867
Got results: [(8003, 8174, u'some string (8003,8174)')] in 0:00:00.000501