Determining All Combinations but With a Grouping Variable

Question

I have the following list of data.

Input <- list(c("1", "2"), c("3", "4"), c("5", "6", "7"))

I want to take one item from each list element and combine them into a vector. Then, from the remaining items in each list element, I'd like to repeat this process, taking another item from each of these list elements and combining them into another vector. I'd like to repeat these steps until I reach some predetermined value (which is 2 in this case; 2 is the maximum number since it happens to be the minimum length of each list element in the Input list).

There are many possible ways to do this, and I'm hoping to find a way to do it that can return every possibility, like this Output list below. I don't really care about the form of the output as long as it contains the same information.

Output <- lapply(list(rbind(as.character(c(1, 3, 5)), as.character(c(2, 4, 6))), rbind(as.character(c(1, 3, 5)), as.character(c(2, 4, 7))), rbind(as.character(c(1, 3, 6)), as.character(c(2, 4, 5))), rbind(as.character(c(1, 3, 6)), as.character(c(2, 4, 7))), rbind(as.character(c(1, 3, 7)), as.character(c(2, 4, 5))), rbind(as.character(c(1, 3, 7)), as.character(c(2, 4, 6))), rbind(as.character(c(1, 4, 5)), as.character(c(2, 3, 6))), rbind(as.character(c(1, 4, 5)), as.character(c(2, 3, 7))), rbind(as.character(c(1, 4, 6)), as.character(c(2, 3, 5))), rbind(as.character(c(1, 4, 6)), as.character(c(2, 3, 7))), rbind(as.character(c(1, 4, 7)), as.character(c(2, 3, 5))), rbind(as.character(c(1, 4, 7)), as.character(c(2, 3, 6)))), function (x) {
  lapply(as.data.frame(t(x)), function (y) {
    y
  })
})

This example is pretty tiny. In practice, I'll probably have more groups (list elements in the Input list) and more elements within each of these groups, and the sizes of the groups may be different like they were in my example. Is there an efficient, programmatic way of doing this operation? I'd love to see a solution using base functions but I'm open to anything. The expand.grid() function doesn't work since it doesn't take into account the grouping variable I have.

Why dont you have `2,1` you only have `1,2` yet you have `3,4` and `4,3`? — Onyambu, May 30 '22 at 18:55
If you mean in the `Output` list, it's because the rows are the vectors I'm creating. I can't have `1` and `2` in the same row in the `Output` list since they are from the same starting group. It doesn't matter which order the rows are in in the `Output` list. I'll update my question to make it clearer. — David Moore, May 30 '22 at 19:06
Could it be that you are just missing the naming feature for variables of the `expand.grid` function? Like: `expand.grid(My_Name_1=L_1, Some_Other_Name=L_2, L_3)` , where L_i are the individual lists? — benjamin, Jun 02 '22 at 20:38

ThomasIsCoding · Accepted Answer · 2022-06-05T21:23:21.613

You could try

lst <- expand.grid(Input)
minlen <- min(lengths(Input))
res <- Filter(
    length,
    combn(
        1:nrow(lst),
        minlen,
        function(x) {
            if (all(!apply(lst[x, ], 2, anyDuplicated))) {
                lst[x, ]
            }
        },
        simplify = FALSE
    )
)

which gives

> res
[[1]]
  Var1 Var2 Var3
1    1    3    5
8    2    4    6

[[2]]
   Var1 Var2 Var3
1     1    3    5
12    2    4    7

[[3]]
  Var1 Var2 Var3
2    2    3    5
7    1    4    6

[[4]]
   Var1 Var2 Var3
2     2    3    5
11    1    4    7

[[5]]
  Var1 Var2 Var3
3    1    4    5
6    2    3    6

[[6]]
   Var1 Var2 Var3
3     1    4    5
10    2    3    7

[[7]]
  Var1 Var2 Var3
4    2    4    5
5    1    3    6

[[8]]
  Var1 Var2 Var3
4    2    4    5
9    1    3    7

[[9]]
   Var1 Var2 Var3
5     1    3    6
12    2    4    7

[[10]]
   Var1 Var2 Var3
6     2    3    6
11    1    4    7

[[11]]
   Var1 Var2 Var3
7     1    4    6
10    2    3    7

[[12]]
  Var1 Var2 Var3
8    2    4    6
9    1    3    7

`res` here doesn't quite look like the output (`Output`) I'm looking for above - is there something I'm missing? — David Moore, Jun 03 '22 at 00:01

GKi · Answer 2 · 2022-06-04T16:50:48.240

Get the minimum length of Input n.
Get the first element of Input with the minimum length i.
Create a list x with all unique permutations of size n per Input.
Take only one combination of element i.
expand.grid for x to get the result.

i <- which.min(lengths(Input))
n <- length(Input[[i]])
x <- lapply(Input, \(x) {
  . <- do.call(expand.grid, rep(list(x), n))
  asplit(.[!apply(., 1, anyDuplicated),], 1)
})
x[[i]]  <- x[[i]][1]
y <- do.call(expand.grid, x)
y
#   Var1 Var2 Var3
#1  2, 1 4, 3 6, 5
#2  2, 1 3, 4 6, 5
#3  2, 1 4, 3 7, 5
#4  2, 1 3, 4 7, 5
#5  2, 1 4, 3 5, 6
#6  2, 1 3, 4 5, 6
#7  2, 1 4, 3 7, 6
#8  2, 1 3, 4 7, 6
#9  2, 1 4, 3 5, 7
#10 2, 1 3, 4 5, 7
#11 2, 1 4, 3 6, 7
#12 2, 1 3, 4 6, 7

#Or more in a format like given in the question
apply(y, 1, \(.) {matrix(unlist(.), length(.[[1]]))}, simplify = FALSE)
#[[1]]
#     [,1] [,2] [,3]
#[1,] "2"  "4"  "6" 
#[2,] "1"  "3"  "5" 
#
#[[2]]
#     [,1] [,2] [,3]
#[1,] "2"  "3"  "6" 
#[2,] "1"  "4"  "5" 
#
#[[3]]
#     [,1] [,2] [,3]
#[1,] "2"  "4"  "7" 
#[2,] "1"  "3"  "5" 
#
#[[4]]
#     [,1] [,2] [,3]
#[1,] "2"  "3"  "7" 
#[2,] "1"  "4"  "5" 
#
#[[5]]
#     [,1] [,2] [,3]
#[1,] "2"  "4"  "5" 
#[2,] "1"  "3"  "6" 
#
#[[6]]
#     [,1] [,2] [,3]
#[1,] "2"  "3"  "5" 
#[2,] "1"  "4"  "6" 
#
#[[7]]
#     [,1] [,2] [,3]
#[1,] "2"  "4"  "7" 
#[2,] "1"  "3"  "6" 
#
#[[8]]
#     [,1] [,2] [,3]
#[1,] "2"  "3"  "7" 
#[2,] "1"  "4"  "6" 
#
#[[9]]
#     [,1] [,2] [,3]
#[1,] "2"  "4"  "5" 
#[2,] "1"  "3"  "7" 
#
#[[10]]
#     [,1] [,2] [,3]
#[1,] "2"  "3"  "5" 
#[2,] "1"  "4"  "7" 
#
#[[11]]
#     [,1] [,2] [,3]
#[1,] "2"  "4"  "6" 
#[2,] "1"  "3"  "7" 
#
#[[12]]
#     [,1] [,2] [,3]
#[1,] "2"  "3"  "6" 
#[2,] "1"  "4"  "7"

The part generating the permutations could be improved e.g. by using gtools::permutations.

Benchmark:

Input <- list(c("1", "2"), c("3", "4"), c("5", "6", "7"))
bench::mark(check=FALSE,
  TIC = {
    lst <- expand.grid(Input)
    Filter(
      length,
      combn(
        1:nrow(lst),
        min(lengths(Input)),
        function(x) {
          if (all(colSums(apply(lst[x, ], 2, duplicated)) == 0)) {
            lst[x, ]
          }
        },
        simplify = FALSE
      )
    )
  } ,
  GKi = {
    n <- min(lengths(Input))
    i <- match(n, lengths(Input))
    x <- lapply(Input, \(x) {
      . <- do.call(expand.grid, rep(list(x), n))
      asplit(.[!apply(., 1, anyDuplicated),], 1)
    })
  x[[i]]  <- x[[i]][1]
  do.call(expand.grid, x) #Here we have the result
}
)
#  expression      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc
#  <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl> <int> <dbl>
#1 TIC          8.66ms   9.78ms      102.    37.6KB     32.9    37    12
#2 GKi         644.1µs 676.21µs     1345.      19KB     25.7   628    12

@GKi is in this example about 10 times faster and uses less memory than @ThomasIsCoding.

I just posted a similar question at https://stackoverflow.com/questions/72912733/determining-all-possible-combinations-of-items-with-a-grouping-variable-allowin - any help is greatly appreciated! — David Moore, Jul 08 '22 at 14:17

Determining All Combinations but With a Grouping Variable

2 Answers2

Linked