0

I have a dataframe with the columns as follows:

Patient   Gene
1         A
1         B
2         A
2         B
2         C
3         A
3         C

And I would like to unpivot, so each patient is a row and each gene is a column, such as in

          GeneA   GeneB   GeneC
Patient1      1       1       0
Patient2      1       1       1 
Patient3      1       0       1
RoyBatty
  • 306
  • 1
  • 7

2 Answers2

1

With pivot_wider:

library(tidyr)
library(dplyr)
df %>% 
  mutate(value = 1) %>% 
  pivot_wider(names_from = Gene, values_fill = 0, names_prefix = "Gene")

output

# A tibble: 3 × 4
  Patient GeneA GeneB GeneC
    <int> <dbl> <dbl> <dbl>
1       1     1     1     0
2       2     1     1     1
3       3     1     0     1

Another option with fastDummies::dummy_cols:

library(fastDummies)
df %>% 
  dummy_cols("Gene", remove_selected_columns = TRUE) %>% 
  group_by(Patient) %>% 
  summarise(across(everything(), max))
Maël
  • 45,206
  • 3
  • 29
  • 67
1
library(data.table)

df <- data.frame(
  stringsAsFactors = FALSE,
  Patient = c(1L, 1L, 2L, 2L, 2L, 3L, 3L),
  Gene = c("A", "B", "A", "B", "C", "A", "C")
)
df
#>   Patient Gene
#> 1       1    A
#> 2       1    B
#> 3       2    A
#> 4       2    B
#> 5       2    C
#> 6       3    A
#> 7       3    C
setDT(df)
dcast(
  data = df,
  formula = Patient ~ paste("Gene", Gene),
  fun.aggregate = function(x) sum(!is.na(x))
)
#> Using 'Gene' as value column. Use 'value.var' to override
#>    Patient Gene A Gene B Gene C
#> 1:       1      1      1      0
#> 2:       2      1      1      1
#> 3:       3      1      0      1

Created on 2022-10-06 with reprex v2.0.2

Yuriy Saraykin
  • 8,390
  • 1
  • 7
  • 14