Note that:
- The list which we do not want to produce, if we did produce it, could be sorted
- It's possible to produce the combination that would reside at a given index in that sorted list without actually producing the list (see
iterCombination
, below)
- It's possible to sample a
range
without producing the entire range as a collection
As such, without producing any length 36C10 list, we can sample 1200 distinct indices from the 36C10 possible indices and then produce the combinations that would reside at those indices if we had produced and sorted the list of all combinations.
With the below code, this is a generator that will yield 1200 distinct samples:
combinations.iterRandomSampleOfCombinations(list(range(36)), 10, 1200)
Combination at an index in a lexicographically sorted list of all combinations of nCk
(From my answer to Nth combination):
def iterCombination(index, n, k):
'''Yields the items of the single combination that would be at the provided
(0-based) index in a lexicographically sorted list of combinations of choices
of k items from n items [0,n), given the combinations were sorted in
descending order. Yields in descending order.
'''
nCk = 1
for nMinusI, iPlus1 in zip(range(n, n - k, -1), range(1, k + 1)):
nCk *= nMinusI
nCk //= iPlus1
curIndex = nCk
for k in range(k, 0, -1):
nCk *= k
nCk //= n
while curIndex - nCk > index:
curIndex -= nCk
nCk *= (n - k)
nCk -= nCk % k
n -= 1
nCk //= n
n -= 1
yield n
A random sample of combinations without creating a list of combinations:
import random
def choose(n, k):
'''Returns the number of ways to choose k items from n items'''
reflect = n - k
if k > reflect:
if k > n:
return 0
k = reflect
if k == 0:
return 1
for nMinusIPlus1, i in zip(range(n - 1, n - k, -1), range(2, k + 1)):
n = n * nMinusIPlus1 // i
return n
def iterRandomSampleOfCombinations(items, combinationSize, sampleSize):
'''Yields a random sample of sampleSize combinations, each composed of
combinationSize elements chosen from items.
The sample is as per random.sample, thus any sub-slice will also be a valid
random sample.
Each combination will be a reverse ordered list of items - one could reverse
them or shuffle them post yield if need be.
'''
n = len(items)
if n < 1 or combinationSize < 1 or combinationSize > n:
return
nCk = choose(n, combinationSize)
if sampleSize > nCk:
return
for sample in random.sample(range(nCk), sampleSize):
yield [items[i] for i in iterCombination(sample, n, combinationSize)]
Example, a sample of 29 combinations of length 10 chosen from 36 items [A-Z] + [a-j]:
>>> items = [chr(i) for i in range(65, 91)] + [chr(i) for i in range(97, 107)]
>>> len(items)
36
>>> for combination in combinations.iterRandomSampleOfCombinations(items, 10, 29):
... sampledCombination
...
['i', 'e', 'b', 'Z', 'U', 'Q', 'N', 'M', 'H', 'A']
['j', 'i', 'h', 'g', 'f', 'Z', 'P', 'I', 'G', 'E']
['e', 'a', 'Z', 'U', 'Q', 'L', 'G', 'F', 'C', 'B']
['i', 'h', 'f', 'Y', 'X', 'W', 'V', 'P', 'I', 'H']
['g', 'Y', 'V', 'S', 'R', 'N', 'M', 'L', 'K', 'I']
['j', 'i', 'f', 'e', 'd', 'b', 'Z', 'X', 'W', 'L']
['g', 'f', 'e', 'Z', 'T', 'S', 'P', 'L', 'J', 'E']
['d', 'c', 'Z', 'X', 'V', 'U', 'S', 'I', 'H', 'C']
['f', 'e', 'Y', 'U', 'I', 'H', 'D', 'C', 'B', 'A']
['j', 'd', 'b', 'W', 'Q', 'P', 'N', 'M', 'F', 'B']
['j', 'a', 'V', 'S', 'P', 'N', 'L', 'J', 'H', 'G']
['g', 'f', 'e', 'a', 'W', 'V', 'O', 'N', 'J', 'D']
['a', 'Z', 'Y', 'W', 'Q', 'O', 'N', 'F', 'B', 'A']
['i', 'g', 'a', 'X', 'V', 'S', 'Q', 'P', 'H', 'D']
['c', 'b', 'a', 'T', 'P', 'O', 'M', 'I', 'D', 'B']
['i', 'f', 'b', 'Y', 'X', 'W', 'V', 'U', 'M', 'A']
['j', 'd', 'U', 'T', 'S', 'K', 'G', 'F', 'C', 'B']
['c', 'Z', 'X', 'U', 'T', 'S', 'O', 'M', 'F', 'D']
['g', 'f', 'X', 'S', 'P', 'M', 'F', 'D', 'C', 'B']
['f', 'Y', 'W', 'T', 'P', 'M', 'J', 'H', 'D', 'C']
['h', 'b', 'Y', 'X', 'W', 'Q', 'K', 'F', 'C', 'B']
['j', 'g', 'Z', 'Y', 'T', 'O', 'L', 'G', 'E', 'D']
['h', 'Z', 'Y', 'S', 'R', 'Q', 'H', 'G', 'F', 'E']
['i', 'c', 'X', 'V', 'R', 'P', 'N', 'L', 'J', 'E']
['f', 'b', 'Z', 'Y', 'W', 'V', 'Q', 'N', 'G', 'D']
['f', 'd', 'c', 'b', 'V', 'T', 'S', 'R', 'Q', 'B']
['i', 'd', 'W', 'U', 'S', 'O', 'N', 'M', 'K', 'G']
['g', 'f', 'a', 'W', 'V', 'T', 'S', 'R', 'H', 'B']
['g', 'f', 'a', 'W', 'T', 'S', 'O', 'L', 'K', 'G']
Note: The combinations themselves are (reverse) ordered, but the sample is random (as is any slice thereof since random.sample
was employed on a range
object), if a random order is also required simply perform random.shuffle(combination)
.
It's pretty fast too (although maybe a different ordering of combinations could be faster?):
>>> samples = 1000
>>> sampleSize = 1200
>>> combinationSize = 10
>>> len(items)
36
>>> while 1:
... start = time.clock()
... for i in range(samples):
... for combination in iterRandomSampleOfCombinations(items, combinationSize, sampleSize):
... pass
... end = time.clock()
... print("{0} seconds per length {1} sample of {2}C{3}".format((end - start)/samples, sampleSize, len(items), combinationSize))
... break
...
0.03162827446371375 seconds per length 1200 sample of 36C10