1

In the example df, I want to move the "group" variable with 5 levels to 5 different variables, and determine which group they belong in. (I know it's kind of weird formatting, but that's easier for future calculation and analysis)

Example df:

|x   |group   |date          |
|1   |1       |2021-01-01    |
|1   |1       |2021-01-02    |
|1   |1       |2021-01-03    |
|1   |2       |2021-01-10    |
|1   |2       |2021-01-11    |
|1   |3       |2021-01-20    |
|1   |3       |2021-01-21    |
|1   |3       |2021-01-22    |
|1   |4       |2021-02-22    |
|1   |5       |2021-03-22    |

Expected result:

|x   |date          |group1   |group2   |group3   |group4   |group5   |
|1   |2021-01-01    |TRUE     |FALSE    |FALSE    |FALSE    |FALSE    |
|1   |2021-01-02    |TRUE     |FALSE    |FALSE    |FALSE    |FALSE    |
|1   |2021-01-03    |TRUE     |FALSE    |FALSE    |FALSE    |FALSE    |
|1   |2021-01-10    |FALSE    |TRUE     |FALSE    |FALSE    |FALSE    |
|1   |2021-01-11    |FALSE    |TRUE     |FALSE    |FALSE    |FALSE    |
|1   |2021-01-20    |FALSE    |FALSE    |TRUE     |FALSE    |FALSE    |
|1   |2021-01-21    |FALSE    |FALSE    |TRUE     |FALSE    |FALSE    |
|1   |2021-01-22    |FALSE    |FALSE    |TRUE     |FALSE    |FALSE    |
|1   |2021-02-22    |FALSE    |FALSE    |FALSE    |TRUE     |FALSE    |
|1   |2021-03-22    |FALSE    |FALSE    |FALSE    |FALSE    |TRUE     |

I've tried pivot_wider, but I don't know where should be the values from. And I've also tried the following codes:

df = df %>%
  mutate(group1= 0,
         group2= 0,
         group3= 0,
         group4= 0,
         group5= 0) %>%
  for (i in 1:nrow(df)){
  if(group == 1){group1 = TRUE}
  else if(group == 2){group2 = TRUE}
  else if(group == 3){group3 = TRUE}
  else if(group == 3){group4 = TRUE}
  else {group5 = TRUE}
  }

And the error is: Error in for (. in i) 1:nrow(df) :4 arguments passed to 'for' which requires 3

xshbj
  • 83
  • 6

3 Answers3

1

After transforming group class to factor we could do:

1. tidyverse way:

library(purrr)
library(tidyr)
library(dplyr)

df %>% 
    mutate(group = as.factor(group),
           group_X = map(group, ~set_names(levels(group) == .x,
                                            levels(group)))) %>% 
    unnest_wider(group_X)
      x group date       group_1 group_2 group_3 group_4 group_5
   <int> <fct> <chr>      <lgl>   <lgl>   <lgl>   <lgl>   <lgl>  
 1     1 1     2021-01-01 TRUE    FALSE   FALSE   FALSE   FALSE  
 2     1 1     2021-01-02 TRUE    FALSE   FALSE   FALSE   FALSE  
 3     1 1     2021-01-03 TRUE    FALSE   FALSE   FALSE   FALSE  
 4     1 2     2021-01-10 FALSE   TRUE    FALSE   FALSE   FALSE  
 5     1 2     2021-01-11 FALSE   TRUE    FALSE   FALSE   FALSE  
 6     1 3     2021-01-20 FALSE   FALSE   TRUE    FALSE   FALSE  
 7     1 3     2021-01-21 FALSE   FALSE   TRUE    FALSE   FALSE  
 8     1 3     2021-01-22 FALSE   FALSE   TRUE    FALSE   FALSE  
 9     1 4     2021-02-22 FALSE   FALSE   FALSE   TRUE    FALSE  
10     1 5     2021-03-22 FALSE   FALSE   FALSE   FALSE   TRUE   

2. sapply:

library(dplyr)
df$group <- as.factor(df$group)
df %>%  cbind(sapply(paste0("group_",levels(.$group)), `==`, .$group))
x group       date group_1 group_2 group_3 group_4 group_5
1  1     1 2021-01-01   FALSE   FALSE   FALSE   FALSE   FALSE
2  1     1 2021-01-02   FALSE   FALSE   FALSE   FALSE   FALSE
3  1     1 2021-01-03   FALSE   FALSE   FALSE   FALSE   FALSE
4  1     2 2021-01-10   FALSE   FALSE   FALSE   FALSE   FALSE
5  1     2 2021-01-11   FALSE   FALSE   FALSE   FALSE   FALSE
6  1     3 2021-01-20   FALSE   FALSE   FALSE   FALSE   FALSE
7  1     3 2021-01-21   FALSE   FALSE   FALSE   FALSE   FALSE
8  1     3 2021-01-22   FALSE   FALSE   FALSE   FALSE   FALSE
9  1     4 2021-02-22   FALSE   FALSE   FALSE   FALSE   FALSE
10 1     5 2021-03-22   FALSE   FALSE   FALSE   FALSE   FALSE
TarJae
  • 72,363
  • 6
  • 19
  • 66
0
library(tidyverse)
library(lubridate)

df = tibble(
  x=1,
  group=sample(1:5,20, replace=TRUE),
  date=sample(seq(ymd("20210101"),
                  ymd("20210201"),
                  ddays(1)),
               20, replace=TRUE)
)

df %>% mutate(
  group1 = group==1,
  group2 = group==2,  
  group3 = group==3,
  group4 = group==4,
  group5 = group==5,
)

Output

 A tibble: 20 x 8
       x group date       group1 group2 group3 group4 group5
   <dbl> <int> <date>     <lgl>  <lgl>  <lgl>  <lgl>  <lgl> 
 1     1     1 2021-01-01 TRUE   FALSE  FALSE  FALSE  FALSE 
 2     1     3 2021-01-01 FALSE  FALSE  TRUE   FALSE  FALSE 
 3     1     5 2021-01-01 FALSE  FALSE  FALSE  FALSE  TRUE  
 4     1     1 2021-01-01 TRUE   FALSE  FALSE  FALSE  FALSE 
 5     1     3 2021-01-01 FALSE  FALSE  TRUE   FALSE  FALSE 
 6     1     4 2021-01-01 FALSE  FALSE  FALSE  TRUE   FALSE 
 7     1     4 2021-01-01 FALSE  FALSE  FALSE  TRUE   FALSE 
 8     1     5 2021-01-01 FALSE  FALSE  FALSE  FALSE  TRUE  
 9     1     4 2021-01-01 FALSE  FALSE  FALSE  TRUE   FALSE 
10     1     5 2021-01-01 FALSE  FALSE  FALSE  FALSE  TRUE  
11     1     2 2021-01-01 FALSE  TRUE   FALSE  FALSE  FALSE 
12     1     1 2021-01-01 TRUE   FALSE  FALSE  FALSE  FALSE 
13     1     5 2021-01-01 FALSE  FALSE  FALSE  FALSE  TRUE  
14     1     1 2021-01-01 TRUE   FALSE  FALSE  FALSE  FALSE 
15     1     1 2021-01-01 TRUE   FALSE  FALSE  FALSE  FALSE 
16     1     3 2021-01-01 FALSE  FALSE  TRUE   FALSE  FALSE 
17     1     4 2021-01-01 FALSE  FALSE  FALSE  TRUE   FALSE 
18     1     5 2021-01-01 FALSE  FALSE  FALSE  FALSE  TRUE  
19     1     2 2021-01-01 FALSE  TRUE   FALSE  FALSE  FALSE 
20     1     5 2021-01-01 FALSE  FALSE  FALSE  FALSE  TRUE  
Marek Fiołka
  • 4,825
  • 1
  • 5
  • 20
0

We can use pivot_wider from tidyr to get the data in wide format. In values_fn we can return TRUE for the group values that is present and return FALSE otherwise using values_fill argument.

tidyr::pivot_wider(df, 
                   names_from = group, 
                   values_from = group, 
                   values_fn = function(x) TRUE, 
                   names_prefix = 'group', 
                   values_fill = FALSE)

#       x date       group1 group2 group3 group4 group5
#   <int> <chr>      <lgl>  <lgl>  <lgl>  <lgl>  <lgl> 
# 1     1 2021-01-01 TRUE   FALSE  FALSE  FALSE  FALSE 
# 2     1 2021-01-02 TRUE   FALSE  FALSE  FALSE  FALSE 
# 3     1 2021-01-03 TRUE   FALSE  FALSE  FALSE  FALSE 
# 4     1 2021-01-10 FALSE  TRUE   FALSE  FALSE  FALSE 
# 5     1 2021-01-11 FALSE  TRUE   FALSE  FALSE  FALSE 
# 6     1 2021-01-20 FALSE  FALSE  TRUE   FALSE  FALSE 
# 7     1 2021-01-21 FALSE  FALSE  TRUE   FALSE  FALSE 
# 8     1 2021-01-22 FALSE  FALSE  TRUE   FALSE  FALSE 
# 9     1 2021-02-22 FALSE  FALSE  FALSE  TRUE   FALSE 
#10     1 2021-03-22 FALSE  FALSE  FALSE  FALSE  TRUE  

data

It is easier to help if you provide data in a reproducible format

df <- structure(list(x = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
    group = c(1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L), date = c("2021-01-01", 
    "2021-01-02", "2021-01-03", "2021-01-10", "2021-01-11", "2021-01-20", 
    "2021-01-21", "2021-01-22", "2021-02-22", "2021-03-22")), row.names = c(NA, 
-10L), class = "data.frame")
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213