2

How do you pivot the first dataset to look like the 2nd?

I'm trying to use the dplyr pivot_longer function without any luck. Below is a sample data set that mirrors my much larger dataset with 50 plus columns.

https://tidyr.tidyverse.org/reference/pivot_longer.html


library(tidyverse)

df1
#> # A tibble: 9 x 6
#>   column_label  val1  val2  cat1  cat2 cat3             
#>          <dbl> <dbl> <dbl> <dbl> <dbl> <chr>            
#> 1            2 0.989  9.89     0    NA <NA>             
#> 2            2 0.622  6.22     1    NA <NA>             
#> 3            3 0.619  6.19    NA     0 <NA>             
#> 4            3 0.119  1.19    NA     1 <NA>             
#> 5           10 0.407  4.07    NA    NA BABY BOOMERS     
#> 6           10 0.800  8.00    NA    NA GEN Z            
#> 7           10 0.305  3.05    NA    NA GENERATION X     
#> 8           10 0.158  1.58    NA    NA MILLENNIALS      
#> 9           10 0.439  4.39    NA    NA SILENT GENERATION

# how do you pivot_longer to create this data set?

df2
#> # A tibble: 9 x 5
#>   column_label  val1  val2 variables Values           
#>          <dbl> <dbl> <dbl> <chr>     <chr>            
#> 1            2 0.989  9.89 cat1      0                
#> 2            2 0.622  6.22 cat1      1                
#> 3            3 0.619  6.19 cat2      0                
#> 4            3 0.119  1.19 cat2      1                
#> 5           10 0.407  4.07 cat3      BABY BOOMERS     
#> 6           10 0.800  8.00 cat3      GEN Z            
#> 7           10 0.305  3.05 cat3      GENERATION X     
#> 8           10 0.158  1.58 cat3      MILLENNIALS      
#> 9           10 0.439  4.39 cat3      SILENT GENERATION

Data

df1 <- structure(list(column_label = c(2, 2, 3, 3, 10, 10, 10, 10, 10
), val1 = c(0.989049526, 0.622384581, 0.618576065, 0.11864823, 
            0.406763475, 0.799564365, 0.3053153, 0.158456912, 0.438528606
), val2 = c(9.890495264, 6.223845807, 6.185760647, 1.186482297, 
            4.067634747, 7.995643651, 3.053153001, 1.584569123, 4.385286057
), cat1 = c(0, 1, NA, NA, NA, NA, NA, NA, NA), cat2 = c(NA, NA, 
                                                        0, 1, NA, NA, NA, NA, NA), cat3 = c(NA, NA, NA, NA, "BABY BOOMERS", 
                                                                                            "GEN Z", "GENERATION X", "MILLENNIALS", "SILENT GENERATION")), class = c("spec_tbl_df", 
                                                                                                                                                                     "tbl_df", "tbl", "data.frame"), row.names = c(NA, -9L), spec = structure(list(
                                                                                                                                                                       cols = list(column_label = structure(list(), class = c("collector_double", 
                                                                                                                                                                                                                              "collector")), val1 = structure(list(), class = c("collector_double", 
                                                                                                                                                                                                                                                                                "collector")), val2 = structure(list(), class = c("collector_double", 
                                                                                                                                                                                                                                                                                                                                  "collector")), cat1 = structure(list(), class = c("collector_double", 
                                                                                                                                                                                                                                                                                                                                                                                    "collector")), cat2 = structure(list(), class = c("collector_double", 
                                                                                                                                                                                                                                                                                                                                                                                                                                      "collector")), cat3 = structure(list(), class = c("collector_character", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        "collector"))), default = structure(list(), class = c("collector_guess", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              "collector")), skip = 1), class = "col_spec"))

df2 <- structure(list(column_label = c(2, 2, 3, 3, 10, 10, 10, 10, 10
), val1 = c(0.989049526, 0.622384581, 0.618576065, 0.11864823, 
            0.406763475, 0.799564365, 0.3053153, 0.158456912, 0.438528606
), val2 = c(9.890495264, 6.223845807, 6.185760647, 1.186482297, 
            4.067634747, 7.995643651, 3.053153001, 1.584569123, 4.385286057
), variables = c("cat1", "cat1", "cat2", "cat2", "cat3", "cat3", 
                 "cat3", "cat3", "cat3"), Values = c("0", "1", "0", "1", "BABY BOOMERS", 
                                                     "GEN Z", "GENERATION X", "MILLENNIALS", "SILENT GENERATION")), class = c("spec_tbl_df", 
                                                                                                                              "tbl_df", "tbl", "data.frame"), row.names = c(NA, -9L), spec = structure(list(
                                                                                                                                cols = list(column_label = structure(list(), class = c("collector_double", 
                                                                                                                                                                                       "collector")), val1 = structure(list(), class = c("collector_double", 
                                                                                                                                                                                                                                         "collector")), val2 = structure(list(), class = c("collector_double", 
                                                                                                                                                                                                                                                                                           "collector")), variables = structure(list(), class = c("collector_character", 
                                                                                                                                                                                                                                                                                                                                                  "collector")), Values = structure(list(), class = c("collector_character", 
                                                                                                                                                                                                                                                                                                                                                                                                      "collector"))), default = structure(list(), class = c("collector_guess", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                            "collector")), skip = 1), class = "col_spec"))

Created on 2020-03-13 by the reprex package (v0.3.0)

Darren Tsai
  • 32,117
  • 5
  • 21
  • 51
Ryan John
  • 1,410
  • 1
  • 15
  • 23

1 Answers1

4

The columns you select in pivot_longer have no common type, i.e. cat1 and cat2 are numerics and cat3 is character. You can convert them all to characters in advance, or use the argument values_ptypes to specify the type.

df1 %>%
  pivot_longer(cat1:cat3,
               names_to = 'variables', values_to = 'Values',
               values_drop_na = TRUE,
               values_ptypes = list(Values = character()))

# # A tibble: 9 x 5
#   column_label  val1  val2 variables Values           
#          <dbl> <dbl> <dbl> <chr>     <chr>            
# 1            2 0.989  9.89 cat1      0                
# 2            2 0.622  6.22 cat1      1                
# 3            3 0.619  6.19 cat2      0                
# 4            3 0.119  1.19 cat2      1                
# 5           10 0.407  4.07 cat3      BABY BOOMERS     
# 6           10 0.800  8.00 cat3      GEN Z            
# 7           10 0.305  3.05 cat3      GENERATION X     
# 8           10 0.158  1.58 cat3      MILLENNIALS      
# 9           10 0.439  4.39 cat3      SILENT GENERATION
Darren Tsai
  • 32,117
  • 5
  • 21
  • 51