I can make a list of all combinations using list(itertools.combinations(range(n), m))
but this will typically be very large.
Given
n
andm
, how can I choose a combination uniformly at random without first constructing a massive list??
In the itertools
module there is a recipe for returning a random combination from an iterable. Below are two versions of the code, one for Python 2.x and one for Python 3.x - in both cases you are using a generator which means that you are not creating a large iterable in memory.
def random_combination(iterable, r):
"Random selection from itertools.combinations(iterable, r)"
pool = tuple(iterable)
n = len(pool)
indices = sorted(random.sample(xrange(n), r))
return tuple(pool[i] for i in indices)
In your case then it would be simple to do:
>>> import random
>>> def random_combination(iterable, r):
"Random selection from itertools.combinations(iterable, r)"
pool = tuple(iterable)
n = len(pool)
indices = sorted(random.sample(xrange(n), r))
return tuple(pool[i] for i in indices)
>>> n = 10
>>> m = 3
>>> print(random_combination(range(n), m))
(3, 5, 9) # Returns a random tuple with length 3 from the iterable range(10)
In the case of Python 3.x you replace the xrange
call with range
but the use-case is still the same.
def random_combination(iterable, r):
"Random selection from itertools.combinations(iterable, r)"
pool = tuple(iterable)
n = len(pool)
indices = sorted(random.sample(range(n), r))
return tuple(pool[i] for i in indices)
From http://docs.python.org/2/library/itertools.html#recipes
def random_combination(iterable, r):
"Random selection from itertools.combinations(iterable, r)"
pool = tuple(iterable)
n = len(pool)
indices = sorted(random.sample(xrange(n), r))
return tuple(pool[i] for i in indices)
A generator would be more memory efficient for iteration:
def random_combination(iterable,r):
i = 0
pool = tuple(iterable)
n = len(pool)
rng = range(n)
while i < r:
i += 1
yield [pool[j] for j in random.sample(rng, r)]
I've modified Jthorpe's generator in order to work when the number of combination you need greater than the length of iterator you pass:
def random_combination(iterable,r):
i = 0
pool = tuple(iterable)
n = len(pool)
rng = range(n)
while i < n**2/2:
i += 1
yield [pool[j] for j in random.sample(rng, r)]
And so, this code maybe help you:
Observation:: there isn't permutation repeated...
Take this output is very
lista = []
i = 0
r = 1000000
while i < r:
lista.append(np.random.permutation(20))
i += 1
print(lista)
# thanks to my friend WIniston for asking about it
Results:::
[array([11, 15, 12, 18, 5, 0, 9, 8, 14, 13, 19, 10, 7, 16, 3, 1, 17,4, 6, 2]),
array([ 5, 15, 12, 4, 17, 16, 14, 7, 19, 1, 2, 10, 3, 0, 18, 6, 9, 11, 13, 8]),
array([16, 5, 12, 19, 18, 17, 7, 1, 10, 4, 11, 3, 0, 14, 15, 9, 6, 2, 13, 8]), ...
The problem with the existing answers is they sample with replacement or wastefully discard duplicate samples. This is fine if you only want to draw one sample but not for drawing many samples (like I wanted to do!).
You can uniformly sample from the space of all combinations very efficiently by making use of Python's inbuilt methods.
from random import sample
def sample_from_all_combinations(all_items, num_samples = 10):
"""Randomly sample from the combination space"""
num_items = len(all_items)
num_combinations = 2 ** num_items
num_samples = min(num_combinations, num_samples)
samples = sample(range(1, num_combinations), k=num_samples)
for combination_num in samples:
items_subset = [
all_items[item_idx]
for item_idx, item_sampled in enumerate(
format(combination_num, f"0{num_items}b")
)
if item_sampled == "1"
]
yield items_subset
NOTE the above will fail with an OverflowError
when len(all_items) >= 64
because the Python ints >= 2 ** 64
are too large to convert to C ssize_t. You can solve this by modifying the sample
method from random
to the following: (Don't forget to update the import to from RandomLarge import sample
)
class RandomLarge(random.Random):
"""
Clone of random inbuilt methods but modified to work with large numbers
>= 2^64
"""
def sample(self, population, k):
"""
Chooses k unique random elements from a population sequence or set.
Modified random sample method to work with large ranges (>= 2^64)
"""
if isinstance(population, random._Set):
population = tuple(population)
if not isinstance(population, random._Sequence):
raise TypeError(
"Population must be a sequence or set. For dicts, use list(d)."
)
randbelow = self._randbelow
# NOTE this is the only line modified from the original function
# n = len(population)
n = population.stop - population.start
if not 0 <= k <= n:
raise ValueError("Sample larger than population or is negative")
result = [None] * k
setsize = 21 # size of a small set minus size of an empty list
if k > 5:
setsize += 4 ** random._ceil(
random._log(k * 3, 4)
) # table size for big sets
if n <= setsize:
# An n-length list is smaller than a k-length set
pool = list(population)
for i in range(k): # invariant: non-selected at [0,n-i)
j = randbelow(n - i)
result[i] = pool[j]
pool[j] = pool[n - i - 1] # move non-selected item into vacancy
else:
selected = set()
selected_add = selected.add
for i in range(k):
j = randbelow(n)
while j in selected:
j = randbelow(n)
selected_add(j)
result[i] = population[j]
return result
To choose a random subset of multiple combinations
You can keep picking random samples and discarding the ones that were already picked.
def random_subset_of_combos2(iterable, r, k):
"""Returns at most `n` random samples of
`r` length (combinations of) subsequences of elements in `iterable`.
"""
def random_combination(iterable, r):
"Random selection from itertools.combinations(iterable, r)"
pool = tuple(iterable)
n = len(pool)
indices = sorted(random.sample(range(n), r))
return tuple(pool[i] for i in indices)
results = set()
while True:
new_combo = random_combination(iterable, r)
if new_combo not in results:
results.add(new_combo)
if len(results) >= min(k, len(iterable)):
break
return results
This seems faster in most cases.
Alternatively, you can assign a index number to each combination, and create a list indexing the targeted number of random samples.
def random_subset_of_combos(iterable, r, k):
"""Returns at most `n` random samples of
`r` length (combinations of) subsequences of elements in `iterable`.
"""
max_combinations = math.comb(len(iterable), min(r, len(iterable)))
k = min(k, max_combinations) # Limit sample size to ...
indexes = set(random.sample(range(max_combinations), k))
max_idx = max(indexes)
results = []
for i, combo in zip(it.count(), it.combinations(iterable, r)):
if i in indexes:
results.append(combo)
elif i > max_idx:
break
return results
Technically, this answer may not be what the OP asked for, but search engines (and at least three SO members) have considered this to be the same question.