0

I would like to move the remaining age and label columns to columns 3 and 4 while retaining data from the ID and region columns. Below I show an example. Of course, I have many more columns.

#I have:

 ID region age label age.1 label.1 age.2 label.2
1  1     AA  50     0    20       0    40       0
2  2     BB  60     1    40       1    NA      NA
3  3     CC  70     1    60       1    NA      NA
4  4     DD  50     1    NA      NA    NA      NA
5  5     EE  20     0    NA      NA    NA      NA

#I expected:

 ID region age label
1  1     AA  50     0
2  2     BB  60     1
3  3     CC  70     1
4  4     DD  50     1
5  5     EE  20     0
6  1     AA  20     0
7  2     BB  40     1
8  3     CC  60     1
9  1     AA  40     0

#My code:
data.frame(ID = 1:5, 
           region = c("AA", "BB", "CC","DD","EE"), 
           age = c(50,60,70,50,20), 
           label = c(0,1,1,1,0),
           age = c(20,40,60,NA,NA), 
           label = c(0,1,1,NA,NA),
           age = c(40,NA,NA,NA,NA), 
           label = c(0,NA,NA,NA,NA))


data.frame(ID = c((1:5),c(1:3),1), 
           region = c("AA", "BB", "CC","DD","EE","AA","BB","CC","AA"), 
           age = c(50,60,70,50,20,20,40,60,40), 
           label = c(0,1,1,1,0,0,1,1,0))
Zizou
  • 503
  • 5
  • 18

2 Answers2

0

one of many options: data.table::melt()

library( data.table )

melt( setDT(mydf), 
      id.vars = c("ID", "region"), 
      measure.vars = patterns( age = "^age", label = "^label"), 
      na.rm = TRUE )[, variable := NULL][]


#    ID region age label
# 1:  1     AA  50     0
# 2:  2     BB  60     1
# 3:  3     CC  70     1
# 4:  4     DD  50     1
# 5:  5     EE  20     0
# 6:  1     AA  20     0
# 7:  2     BB  40     1
# 8:  3     CC  60     1
# 9:  1     AA  40     0     
Wimpel
  • 26,031
  • 1
  • 20
  • 37
0

With tidyverse you can do this:

library(tidyverse)

df %>% 
  pivot_longer(cols = age:label.2, values_to = 'age') %>% 
  separate(name, into = c('a', 'label')) %>% 
  dplyr::filter(a != 'label') %>% 
  select(ID, region, age, label) %>% 
  mutate(label = ifelse(is.na(label), 0, label)) %>% 
  na.omit() 

#> # A tibble: 9 x 4
#>      ID region   age label
#>   <int> <fct>  <dbl> <chr>
#> 1     1 AA        50 0    
#> 2     1 AA        20 1    
#> 3     1 AA        40 2    
#> 4     2 BB        60 0    
#> 5     2 BB        40 1    
#> 6     3 CC        70 0    
#> 7     3 CC        60 1    
#> 8     4 DD        50 0    
#> 9     5 EE        20 0
MarBlo
  • 4,195
  • 1
  • 13
  • 27