3

I'm currently trying to transpose (wide to long) multiple variables by using pivot_longer function in R.

Here's what I have

primary_id = c("A1", "A2")
id1 = c("B1", "B2")
score1 = c("good", "bad")
id2 = c("C1", "C2")
score2 = c(NA, "neutral")
id3 = c("D1", "D2")
score3 = c("bad", "neutral")
id4 = c("E1", "E2")
score4 = c(NA, "good")
have = data.frame(primary_id, id1, score1, id2, score2, id3, score3, id4, score4)

have
  primary_id id1 score1 id2  score2 id3  score3 id4 score4
1         A1  B1   good  C1    <NA>  D1     bad  E1   <NA>
2         A2  B2    bad  C2 neutral  D2 neutral  E2   good

Please note that I have 20 id variables and 20 score variables (example only shows 4)

And here's what I'm seeking

  primary_id ids  scores
1         A1  B1    good
2         A1  C1    <NA>
3         A1  D1     bad
4         A1  E1    <NA>
5         A2  B2     bad
6         A2  C2 neutral
7         A2  D2 neutral
8         A2  E2    good

Any advice/help would be much appreciated! Please note that score variable(s) have NAs here and there.

Darren Tsai
  • 32,117
  • 5
  • 21
  • 51
Brian
  • 161
  • 11

3 Answers3

5

in base R:

reshape(have,-1, dir='long', sep='', idvar = 'primary_id')

     primary_id time id   score
A1.1         A1    1 B1    good
A2.1         A2    1 B2     bad
A1.2         A1    2 C1    <NA>
A2.2         A2    2 C2 neutral
A1.3         A1    3 D1     bad
A2.3         A2    3 D2 neutral
A1.4         A1    4 E1    <NA>
A2.4         A2    4 E2    good

in data.table:

library(data.table)
melt(setDT(have),1, patterns(id = '^id', score = '^score'))
   primary_id variable id   score
1:         A1        1 B1    good
2:         A2        1 B2     bad
3:         A1        2 C1    <NA>
4:         A2        2 C2 neutral
5:         A1        3 D1     bad
6:         A2        3 D2 neutral
7:         A1        4 E1    <NA>
8:         A2        4 E2    good

in tidyverse

library(tidyverse)
pivot_longer(have, -1, names_to = '.value', names_pattern = '(\\D+)')

# A tibble: 8 × 3
  primary_id id    score  
  <chr>      <chr> <chr>  
1 A1         B1    good   
2 A1         C1    NA     
3 A1         D1    bad    
4 A1         E1    NA     
5 A2         B2    bad    
6 A2         C2    neutral
7 A2         D2    neutral
8 A2         E2    good   
Onyambu
  • 67,392
  • 3
  • 24
  • 53
3

You could set ".value" in names_to and supply one of names_sep or names_pattern to specify how the column names should be split.

library(tidyr)

have %>%
  pivot_longer(
    -primary_id,
    names_to = c(".value", NA),
    names_sep = "(?=\\d)" # or names_pattern = "(.+)(.)"
  )

# A tibble: 8 × 3
  primary_id id    score
  <chr>      <chr> <chr>
1 A1         B1    good
2 A1         C1    NA
3 A1         D1    bad
4 A1         E1    NA     
5 A2         B2    bad
6 A2         C2    neutral
7 A2         D2    neutral
8 A2         E2    good
Darren Tsai
  • 32,117
  • 5
  • 21
  • 51
1

Here is a solution similar to @Darren Tsai one, using an intermediate step in preparing the names for names_sep with rename_with:

library(dplyr)

have %>%
  rename_with(., ~sub("(.*)(\\d+)$", "\\1_\\2", .)) %>% 
  pivot_longer(
    -primary_id,
    names_to = c(".value", NA),
    names_sep = "_"
  )
 primary_id id    score  
  <chr>      <chr> <chr>  
1 A1         B1    good   
2 A1         C1    NA     
3 A1         D1    bad    
4 A1         E1    NA     
5 A2         B2    bad    
6 A2         C2    neutral
7 A2         D2    neutral
8 A2         E2    good  
TarJae
  • 72,363
  • 6
  • 19
  • 66