7

Say I have a list L. How can I get an iterator over all partitions of K groups?

Example: L = [ 2,3,5,7,11, 13], K = 3

List of all possible partitions of 3 groups:

[ [ 2 ], [ 3, 5], [ 7,11,13] ]
[ [ 2,3,5 ], [ 7, 11], [ 13] ]
[ [ 3, 11 ], [ 5, 7], [ 2, 13] ]
[ [ 3 ], [ 11 ], [ 5, 7, 2, 13] ]
etc...

=== UPDATE ===

I was working on a solution which seems to be working, so I will just copy paste it

# -*- coding: utf-8 -*-

import itertools 

# return ( list1 - list0 )
def l1_sub_l0( l1, l0 ) :
    """Substract two lists"""
    #
    copy_l1 = list( l1 )
    copy_l0 = list( l0 )

    #
    for xx in l0 :
        #
        if copy_l1.count( xx ) > 0 :
            #
            copy_l1.remove( xx )
            copy_l0.remove( xx )

    #
    return [ copy_l1, copy_l0 ]


#
def gen_group_len( n, k ) :
    """Generate all possible group sizes"""

    # avoid doubles
    stop_list = []
    #
    for t in itertools.combinations_with_replacement( xrange( 1, n - 1 ), k - 1 ) :
        #
        last_n = n - sum( t )

        # valid group size
        if last_n  >= 1 :
            res = tuple( sorted( t + ( last_n, ) ) )
            #
            if res not in stop_list :
                yield res
                stop_list.append( res )


# group_len = (1, 1, 3)

def gen( group_len, my_list ) :
    """Generate all possible partitions of all possible group sizes"""

    #
    if len( group_len ) == 1 :
        yield ( tuple( my_list ), )

    #
    else :

        # need for a stop list if 2 groups of same size
        stop_list = []

        #
        for t in itertools.combinations( my_list, group_len[ 0 ] ) :
            #
            reduced_list = l1_sub_l0( my_list, t )[ 0 ]

            #
            for t2 in gen( group_len[ 1: ], reduced_list ) :
                #
                tmp = set( ( t, t2[ 0 ] ) )
                #
                if tmp not in stop_list :
                    yield ( t, ) + t2
                    # avoid doing same thing twice
                    if group_len[ 1 ] == group_len[ 0 ] :
                        stop_list.append( tmp )


#
my_list = [ 3,5,7,11,13 ]
n = len( my_list )
k = 3

#
group_len_list = list( gen_group_len( n, k ) )
print "for %i elements, %i configurations of group sizes" % ( n, len(  group_len_list ) )
print group_len_list

#
for group_len in group_len_list :
    #
    print "group sizes", group_len
    #
    for x in gen( group_len, my_list ) :
        print x
    #
    print "==="

Output:

for 5 elements, 2 configurations of group sizes
[(1, 1, 3), (1, 2, 2)]
group sizes (1, 1, 3)
((3,), (5,), (7, 11, 13))
((3,), (7,), (5, 11, 13))
((3,), (11,), (5, 7, 13))
((3,), (13,), (5, 7, 11))
((5,), (7,), (3, 11, 13))
((5,), (11,), (3, 7, 13))
((5,), (13,), (3, 7, 11))
((7,), (11,), (3, 5, 13))
((7,), (13,), (3, 5, 11))
((11,), (13,), (3, 5, 7))
===
group sizes (1, 2, 2)
((3,), (5, 7), (11, 13))
((3,), (5, 11), (7, 13))
((3,), (5, 13), (7, 11))
((5,), (3, 7), (11, 13))
((5,), (3, 11), (7, 13))
((5,), (3, 13), (7, 11))
((7,), (3, 5), (11, 13))
((7,), (3, 11), (5, 13))
((7,), (3, 13), (5, 11))
((11,), (3, 5), (7, 13))
((11,), (3, 7), (5, 13))
((11,), (3, 13), (5, 7))
((13,), (3, 5), (7, 11))
((13,), (3, 7), (5, 11))
((13,), (3, 11), (5, 7))
===
Has QUIT--Anony-Mousse
  • 76,138
  • 12
  • 138
  • 194
usual me
  • 8,338
  • 10
  • 52
  • 95
  • 1
    And what have you tried so far? – Paolo Casciello Aug 21 '13 at 09:32
  • See also: [Finding all k-subset partitions](http://codereview.stackexchange.com/q/1526/4433) – Martin Thoma Apr 02 '15 at 11:49
  • Note that the algorithm described in [Finding all k-subset partitions](https://codereview.stackexchange.com/questions/1526/finding-all-k-subset-partitions) returns all _non-empty_ subsets. Since the OP didn't mention this to be a constraint, I assume that algorithm would not serve his purposes. – Kurt Peek Sep 22 '17 at 11:53
  • 1
    Do you consider `((3,), (5,), (7, 11, 13))` and `((7, 11, 13)), (3,), (5,))` the same? – pylang Jun 11 '19 at 18:45

5 Answers5

3

This works, although it is probably super inneficient (I sort them all to avoid double-counting):

def clusters(l, K):
    if l:
        prev = None
        for t in clusters(l[1:], K):
            tup = sorted(t)
            if tup != prev:
                prev = tup
                for i in xrange(K):
                    yield tup[:i] + [[l[0]] + tup[i],] + tup[i+1:]
    else:
        yield [[] for _ in xrange(K)]

It also returns empty clusters, so you would probably want to wrap this in order to get only the non-empty ones:

def neclusters(l, K):
    for c in clusters(l, K):
        if all(x for x in c): yield c

Counting just to check:

def kamongn(n, k):
    res = 1
    for x in xrange(n-k, n):
        res *= x + 1
    for x in xrange(k):
        res /= x + 1
    return res

def Stirling(n, k):
    res = 0
    for j in xrange(k + 1):
        res += (-1)**(k-j) * kamongn(k, j) * j ** n
    for x in xrange(k):
        res /= x + 1
    return res

>>> sum(1 for _ in neclusters([2,3,5,7,11,13], K=3)) == Stirling(len([2,3,5,7,11,13]), k=3)
True

It works !

The output:

>>> clust = neclusters([2,3,5,7,11,13], K=3)
>>> [clust.next() for _ in xrange(5)]
[[[2, 3, 5, 7], [11], [13]], [[3, 5, 7], [2, 11], [13]], [[3, 5, 7], [11], [2, 13]], [[2, 3, 11], [5, 7], [13]], [[3, 11], [2, 5, 7], [13]]]
val
  • 8,459
  • 30
  • 34
  • Glad I could help ! I think if your list is sorted, you can probably work around the whole `sorted`/`if tup != prev`... part and generate only the necessary ones. But this is left as an exercise to the reader :) – val Aug 21 '13 at 13:19
3

Filter partitions of size k using more_itertools.partitions (note the trailing "s"):

Given

import itertools as it

import more_itertools as mit


iterable = [2, 3, 5, 7, 11]
k = 3

Demo

res = [p for perm in it.permutations(iterable) for p in mit.partitions(perm) if len(p) == k]
len(res)
# 720

res
# [[[2], [3], [5, 7, 11]],
#  [[2], [3, 5], [7, 11]],
#  [[2], [3, 5, 7], [11]],
#  [[2, 3], [5], [7, 11]],
#  [[2, 3], [5, 7], [11]],
#  [[2, 3, 5], [7], [11]],
#  ...
#  [[3], [2], [5, 7, 11]],
#  [[3], [2, 5], [7, 11]],
#  [[3], [2, 5, 7], [11]],
#  [[3, 2], [5], [7, 11]],
#  [[3, 2], [5, 7], [11]],
#  [[3, 2, 5], [7], [11]],
#  [[3], [2], [5, 11, 7]],
#  ...
# ]

This version gives partitions of a permuted input. Partitions of repeated elements may be included, e.g. [[3,], [5,], [7, 11, 13]] and [[7, 11, 13]], [3,], [5,]].

Note: more_itertools is a third-party package. Install via > pip install more_itertools.

pylang
  • 40,867
  • 14
  • 129
  • 121
  • If I am understanding the OP's question correctly, this is a good answer for [this other question](https://stackoverflow.com/q/23596702/3780389), but gives the wrong answer for this question, since it constrains subsets to be contiguous with respect to the original order. (e.g. your list doesn't include the any partitions with the subset [3,11] in it because 3 and 11 are not adjacent in the original list). – teichert Jun 11 '19 at 17:32
  • 1
    That is a sound observation. Yes this version of `partitions` returns partitions from contiguous elements. We can remedy this using `itertools.permutations`. See update. – pylang Jun 11 '19 at 18:51
2

A simple alternative view of this problem is the assignment of one of the three cluster labels to each element.

import itertools
def neclusters(l, k):
    for labels in itertools.product(range(k), repeat=len(l)):
        partition = [[] for i in range(k)]
        for i, label in enumerate(labels):
            partition[label].append(l[i])
        yield partition

as with @val's answer, this can be wrapped to remove partitions with empty clusters.

joeln
  • 3,563
  • 25
  • 31
  • 1
    This solution includes duplicate partitions, but if you filter out the empty ones, sort each partition, and then remove duplicates, you will be left with all of the partitions up to the given number of subsets: e.g. `n=5; set(map((lambda p: tuple(sorted(map(tuple, filter(len, p))))), neclusters(range(n), n)))` ([52 distinct partitions in this case](https://oeis.org/A000110)) – teichert Jun 11 '19 at 18:09
1

Edited: As noted by @moose, the following only determines partitions where contiguous indices are in the same cluster. Performing this partitioning over all permutations will give the sought answer.

Itertools is very useful for this sort of combinatoric listing. First, we consider your task as the equivalent problem of selecting all sets of k-1 distinct split points in the array. This is solved by itertools.combinations, which returns combinations without replacement of a particular size from a given iterable, and the values it returns are in the order that they are found in the original iterable.

Your problem is thus solved by the following:

import itertools
def neclusters(l, K):
    for splits in itertools.combinations(range(len(l) - 1), K - 1):
        # splits need to be offset by 1, and padded
        splits = [0] + [s + 1 for s in splits] + [None]
        yield [l[s:e] for s, e in zip(splits, splits[1:])]

numpy's split function is designed to make these sorts of partitions given split offsets, so here's an alternative that generates lists of numpy arrays:

import itertools
def neclusters(l, K):
    for splits in itertools.combinations(range(len(l) - 1), K - 1):
        yield np.split(l, 1 + np.array(splits))
Glorfindel
  • 21,988
  • 13
  • 81
  • 109
joeln
  • 3,563
  • 25
  • 31
  • This seems to be wrong. It only finds partitions which don't change the order of elements. But for `l=[0,1,2]` and `K=2` it does not find `[[0,2],[1]]`. – Martin Thoma Apr 02 '15 at 11:33
  • Actually, his answer is the only correct. The question is asking for a list partitions, not set partitions. – Rok Kralj Jan 09 '16 at 13:55
  • It looks like the examples given by the OP are set partitions (even though the input is a list). (e.g. [ 3, 11 ], [ 5, 7], [ 2, 13] ]) – teichert Jun 11 '19 at 17:34
0

A reasonably efficient way is to pivot on the first element in each recursion to force uniqueness, and simply go through combinations of all increasing sizes up to the point it would give empty subsets.

def kpartitions(l, k):
  import itertools
  if k == 1: yield [l]; return
  for i in range(1, len(l)-k+1+1):
    s = set(range(1, len(l)))
    for comb in itertools.combinations(s, i-1):
      for t in kpartitions([l[idx] for idx in s - set(comb)], k-1):
        yield [[l[0], *(l[idx] for idx in comb)], *t]
def stirlingsecond(n, k):
  import math
  return sum((-1 if (i & 1 != 0) else 1) * math.comb(k, i)*((k-i)**n)
    for i in range(k+1)) // math.factorial(k)
assert len(list(kpartitions([3,5,7,11,13], 3))) == stirlingsecond(5, 3)
assert len(list(kpartitions([2,3,5,7,11,13], 3))) == stirlingsecond(6, 3)

This is quite efficient though it does a little extra work to find the elements not in the combinations, as itertools.combinations is convenient, though writing a combination function which yields both the combination and those elements not in it might give a constant time improvement.

Gregory Morse
  • 369
  • 2
  • 10