3

I have a list of customer IDs, each with a list of unique products they used. There can theoretically be up to ~150 unique products.

df <- tibble(ID = c(1,1,1,2,2,3,3,4),
             prod = c("Prod1", "Prod2", "Prod3", "Prod1", "Prod4", "Prod3", "Prod5", "Prod2"))

From that, I need to get all possible combinations of products for each ID, not only on the highest level (grouped by ID). That is, include the combination with all products, as expand_grid() would do, but also all combinations of 1,...,n elements, where n is the number of unique products the ID has.

Final dataset should therefore look as such:

df_results <- tibble(ID = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4),
                     combo = c("Prod1", "Prod2", "Prod3", "Prod1|Prod2", "Prod1|Prod3", "Prod2|Prod3", "Prod1|Prod2|Prod3",
                               "Prod1", "Prod4", "Prod1|Prod4",
                               "Prod3", "Prod5", "Prod3|Prod5",
                               "Prod2"))
ThomasIsCoding
  • 96,636
  • 9
  • 24
  • 81
Jirka Čep
  • 181
  • 7

3 Answers3

6

An extension of the canonical answer:

library(dplyr)
df %>% 
  group_by(ID) %>% 
  reframe(combo = as.character(do.call(c, lapply(seq_along(prod), \(m) combn(x = prod, m = m, FUN = \(x) paste(x, collapse = "|"))))))
# A tibble: 14 × 2
      ID combo            
   <dbl> <chr>            
 1     1 Prod1            
 2     1 Prod2            
 3     1 Prod3            
 4     1 Prod1|Prod2      
 5     1 Prod1|Prod3      
 6     1 Prod2|Prod3      
 7     1 Prod1|Prod2|Prod3
 8     2 Prod1            
 9     2 Prod4            
10     2 Prod1|Prod4      
11     3 Prod3            
12     3 Prod5            
13     3 Prod3|Prod5      
14     4 Prod2           

Or in base R:

stack(tapply(df$prod, df$ID, 
       \(prod) do.call(c, lapply(seq_along(prod), \(m) combn(prod, m, FUN = \(x) paste(x, collapse = "|"))))))[2:1]
Maël
  • 45,206
  • 3
  • 29
  • 67
2

Another tidyverse option could be:

df %>%
 group_by(ID) %>%
 transmute(combo = map2(.x = list(prod), 
                        .y = seq_along(prod),
                        ~ combn(.x, .y, FUN = paste, collapse = "|"))) %>%
 unnest_longer(combo)

      ID combo            
   <dbl> <chr>            
 1     1 Prod1            
 2     1 Prod2            
 3     1 Prod3            
 4     1 Prod1|Prod2      
 5     1 Prod1|Prod3      
 6     1 Prod2|Prod3      
 7     1 Prod1|Prod2|Prod3
 8     2 Prod1            
 9     2 Prod4            
10     2 Prod1|Prod4      
11     3 Prod3            
12     3 Prod5            
13     3 Prod3|Prod5      
14     4 Prod2  
tmfmnk
  • 38,881
  • 4
  • 47
  • 67
1

Here is another base R option using intToBits to map all combinations into binary presentation of integer indexing

with(
  df,
  setNames(
    rev(
      stack(
        by(
          Prod, ID,
          function(p) {
            sapply(
              seq(2^length(p) - 1),
              function(k) paste0(p[which(intToBits(k) > 0)], collapse = "|")
            )
          }
        )
      )
    ), names(df)
  )
)

which gives

   ID              Prod
1   1             Prod1
2   1             Prod2
3   1       Prod1|Prod2
4   1             Prod3
5   1       Prod1|Prod3
6   1       Prod2|Prod3
7   1 Prod1|Prod2|Prod3
8   2             Prod1
9   2             Prod4
10  2       Prod1|Prod4
11  3             Prod3
12  3             Prod5
13  3       Prod3|Prod5
14  4             Prod2

If you want to EXPLORE THE POSSIBILITY OF USING expand.grid (but NOT recommend it since it is rather inefficient), you can try the code below

with(
  df,
  setNames(
    rev(
      stack(
        lapply(
          split(Prod, ID),
          function(x) {
            unique(
              apply(
                expand.grid(rep(list(x), length(x))),
                1,
                function(v) {
                  paste0(sort(unique(v)), collapse = "|")
                }
              )
            )
          }
        )
      )
    ), names(df)
  )
)

which gives

   ID              Prod
1   1             Prod1
2   1       Prod1|Prod2
3   1       Prod1|Prod3
4   1 Prod1|Prod2|Prod3
5   1             Prod2
6   1       Prod2|Prod3
7   1             Prod3
8   2             Prod1
9   2       Prod1|Prod4
10  2             Prod4
11  3             Prod3
12  3       Prod3|Prod5
13  3             Prod5
14  4             Prod2
ThomasIsCoding
  • 96,636
  • 9
  • 24
  • 81