5

I have two vectors and I am trying to find all unique combinations of 3 elements from vector1 and 2 elements from vector2. I have tried the following code.

V1 = combn(1:5, 3)   # 10 combinations in total
V2 = combn(6:11, 2)  # 15 combinations in total

How to combine V1 and V2 so that there are 10 * 15 = 150 combinations in total? Thanks.

Yang Yang
  • 858
  • 3
  • 26
  • 49

5 Answers5

6

The function comboGrid from RcppAlgos (I am the author) does just the trick:

library(RcppAlgos)

grid <- comboGrid(c(rep(list(1:5), 3), rep(list(6:11), 2)),
                  repetition = FALSE)

head(grid)
#>      Var1 Var2 Var3 Var4 Var5
#> [1,]    1    2    3    6    7
#> [2,]    1    2    3    6    8
#> [3,]    1    2    3    6    9
#> [4,]    1    2    3    6   10
#> [5,]    1    2    3    6   11
#> [6,]    1    2    3    7    8

tail(grid)
#>        Var1 Var2 Var3 Var4 Var5
#> [145,]    3    4    5    8    9
#> [146,]    3    4    5    8   10
#> [147,]    3    4    5    8   11
#> [148,]    3    4    5    9   10
#> [149,]    3    4    5    9   11
#> [150,]    3    4    5   10   11

It is quite efficient as well. It is written in C++ and pulls together many ideas from the excellent question: Picking unordered combinations from pools with overlap. The underlying algorithm avoids generating duplicates that would need to be filtered out.

Consider the following example where generating the Cartesian product contains more than 10 billion results:

system.time(huge <- comboGrid(c(rep(list(1:20), 5), rep(list(21:35), 3)),
                              repetition = FALSE))
#>    user  system elapsed 
#>   0.990   0.087   1.077

dim(huge)
#> [1] 7054320       8
Joseph Wood
  • 7,077
  • 2
  • 30
  • 65
6

You can try expand.grid along with asplit, e.g.,

expand.grid(asplit(V1,2), asplit(V2,2))

or

with(
  expand.grid(asplit(V1, 2), asplit(V2, 2)),
  t(mapply(c, Var1, Var2))
)
ThomasIsCoding
  • 96,636
  • 9
  • 24
  • 81
  • Hi Thomasls, this solution is very efficient, however, the final product is a dataframe that includes 150 rows and 2 columns. Is it possible to split the 2 columns into 5 columns as there are 5 elements in each row? Thank you. – Yang Yang Dec 03 '22 at 04:42
  • 1
    @YangYang Try the second option. – ThomasIsCoding Dec 03 '22 at 07:54
3

You can use expand.grid():

g <- expand.grid(seq_len(ncol(V1)), seq_len(ncol(V2)))
V3 <- rbind(V1[, g[, 1]], V2[, g[, 2]])

The result is in a similar format as V1 and V2, i.e. a 5 × 150 matrix (here printed transposed):

head(t(V3))
#      [,1] [,2] [,3] [,4] [,5]
# [1,]    1    2    3    6    7
# [2,]    1    2    4    6    7
# [3,]    1    2    5    6    7
# [4,]    1    3    4    6    7
# [5,]    1    3    5    6    7
# [6,]    1    4    5    6    7

dim(unique(t(V3)))
# [1] 150   5

And a generalized approach that can handle more than two initial matrices of combinations, stored in a list V:

V <- list(V1, V2)
g <- do.call(expand.grid, lapply(V, \(x) seq_len(ncol(x))))
V.comb <- do.call(rbind, mapply('[', V, T, g))

identical(V.comb, V3)
[1] TRUE
Robert Hacken
  • 3,878
  • 1
  • 13
  • 15
3

After some helpful refactoring guidance from @onyambu, here is a shorter solution based on base::merge():

merge(t(combn(1:5, 3)),t(combn(6:11, 2)),by.x=NULL,by.y = NULL)

...and the first 20 rows of output:

> merge(t(combn(1:5, 3)),t(combn(6:11, 2)),by.x=NULL,by.y = NULL)
    V1.x V2.x V3 V1.y V2.y
1      1    2  3    6    7
2      1    2  4    6    7
3      1    2  5    6    7
4      1    3  4    6    7
5      1    3  5    6    7
6      1    4  5    6    7
7      2    3  4    6    7
8      2    3  5    6    7
9      2    4  5    6    7
10     3    4  5    6    7
11     1    2  3    6    8
12     1    2  4    6    8
13     1    2  5    6    8
14     1    3  4    6    8
15     1    3  5    6    8
16     1    4  5    6    8
17     2    3  4    6    8
18     2    3  5    6    8
19     2    4  5    6    8
20     3    4  5    6    8

original solution

A base R solution to create a Cartesian product with merge() looks like this:

df1 <- data.frame(t(combn(1:5, 3)))
df2 <- data.frame(t(combn(6:11, 2)))
colnames(df2) <- paste("y",1:2,sep=""))

merge(df1,df2,by.x=NULL,by.y = NULL)

...and the first 25 rows of output:

> merge(df1,df2,by.x=NULL,by.y = NULL)
    X1 X2 X3 y1 y2
1    1  2  3  6  7
2    1  2  4  6  7
3    1  2  5  6  7
4    1  3  4  6  7
5    1  3  5  6  7
6    1  4  5  6  7
7    2  3  4  6  7
8    2  3  5  6  7
9    2  4  5  6  7
10   3  4  5  6  7
11   1  2  3  6  8
12   1  2  4  6  8
13   1  2  5  6  8
14   1  3  4  6  8
15   1  3  5  6  8
16   1  4  5  6  8
17   2  3  4  6  8
18   2  3  5  6  8
19   2  4  5  6  8
20   3  4  5  6  8
21   1  2  3  6  9
22   1  2  4  6  9
23   1  2  5  6  9
24   1  3  4  6  9
25   1  3  5  6  9
Len Greski
  • 10,505
  • 2
  • 22
  • 33
  • 1
    this is a good solution, make it better: No need of the `df1` `df2`. Just do `merge(t(V1), t(V2), by=NULL)` – Onyambu Dec 02 '22 at 22:52
2

Similar idea, using apply

apply(expand.grid(seq(ncol(V1)), seq(ncol(V2))), 1, function(i) {
  c(V1[,i[1]], V2[,i[2]])})
#>      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#> [1,]    1    1    1    1    1    1    2    2    2     3     1     1     1     1
#> [2,]    2    2    2    3    3    4    3    3    4     4     2     2     2     3
#> [3,]    3    4    5    4    5    5    4    5    5     5     3     4     5     4
#> [4,]    6    6    6    6    6    6    6    6    6     6     6     6     6     6
#> [5,]    7    7    7    7    7    7    7    7    7     7     8     8     8     8
#>      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
#> [1,]     1     1     2     2     2     3     1     1     1     1     1     1
#> [2,]     3     4     3     3     4     4     2     2     2     3     3     4
#> [3,]     5     5     4     5     5     5     3     4     5     4     5     5
#> [4,]     6     6     6     6     6     6     6     6     6     6     6     6
#> [5,]     8     8     8     8     8     8     9     9     9     9     9     9
#>      [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
#> [1,]     2     2     2     3     1     1     1     1     1     1     2     2
#> [2,]     3     3     4     4     2     2     2     3     3     4     3     3
#> [3,]     4     5     5     5     3     4     5     4     5     5     4     5
#> [4,]     6     6     6     6     6     6     6     6     6     6     6     6
#> [5,]     9     9     9     9    10    10    10    10    10    10    10    10
#>      [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
#> [1,]     2     3     1     1     1     1     1     1     2     2     2     3
#> [2,]     4     4     2     2     2     3     3     4     3     3     4     4
#> [3,]     5     5     3     4     5     4     5     5     4     5     5     5
#> [4,]     6     6     6     6     6     6     6     6     6     6     6     6
#> [5,]    10    10    11    11    11    11    11    11    11    11    11    11
#>      [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62]
#> [1,]     1     1     1     1     1     1     2     2     2     3     1     1
#> [2,]     2     2     2     3     3     4     3     3     4     4     2     2
#> [3,]     3     4     5     4     5     5     4     5     5     5     3     4
#> [4,]     7     7     7     7     7     7     7     7     7     7     7     7
#> [5,]     8     8     8     8     8     8     8     8     8     8     9     9
#>      [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74]
#> [1,]     1     1     1     1     2     2     2     3     1     1     1     1
#> [2,]     2     3     3     4     3     3     4     4     2     2     2     3
#> [3,]     5     4     5     5     4     5     5     5     3     4     5     4
#> [4,]     7     7     7     7     7     7     7     7     7     7     7     7
#> [5,]     9     9     9     9     9     9     9     9    10    10    10    10
#>      [,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86]
#> [1,]     1     1     2     2     2     3     1     1     1     1     1     1
#> [2,]     3     4     3     3     4     4     2     2     2     3     3     4
#> [3,]     5     5     4     5     5     5     3     4     5     4     5     5
#> [4,]     7     7     7     7     7     7     7     7     7     7     7     7
#> [5,]    10    10    10    10    10    10    11    11    11    11    11    11
#>      [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98]
#> [1,]     2     2     2     3     1     1     1     1     1     1     2     2
#> [2,]     3     3     4     4     2     2     2     3     3     4     3     3
#> [3,]     4     5     5     5     3     4     5     4     5     5     4     5
#> [4,]     7     7     7     7     8     8     8     8     8     8     8     8
#> [5,]    11    11    11    11     9     9     9     9     9     9     9     9
#>      [,99] [,100] [,101] [,102] [,103] [,104] [,105] [,106] [,107] [,108]
#> [1,]     2      3      1      1      1      1      1      1      2      2
#> [2,]     4      4      2      2      2      3      3      4      3      3
#> [3,]     5      5      3      4      5      4      5      5      4      5
#> [4,]     8      8      8      8      8      8      8      8      8      8
#> [5,]     9      9     10     10     10     10     10     10     10     10
#>      [,109] [,110] [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      8      8      8      8      8      8      8      8      8      8
#> [5,]     10     10     11     11     11     11     11     11     11     11
#>      [,119] [,120] [,121] [,122] [,123] [,124] [,125] [,126] [,127] [,128]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      8      8      9      9      9      9      9      9      9      9
#> [5,]     11     11     10     10     10     10     10     10     10     10
#>      [,129] [,130] [,131] [,132] [,133] [,134] [,135] [,136] [,137] [,138]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      9      9      9      9      9      9      9      9      9      9
#> [5,]     10     10     11     11     11     11     11     11     11     11
#>      [,139] [,140] [,141] [,142] [,143] [,144] [,145] [,146] [,147] [,148]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      9      9     10     10     10     10     10     10     10     10
#> [5,]     11     11     11     11     11     11     11     11     11     11
#>      [,149] [,150]
#> [1,]      2      3
#> [2,]      4      4
#> [3,]      5      5
#> [4,]     10     10
#> [5,]     11     11

Created on 2022-12-02 with reprex v2.0.2

Allan Cameron
  • 147,086
  • 7
  • 49
  • 87