2

I have an extremely wide dataset that I am trying to unpivot to a degree but not completely. Essentially I am trying to group certain columns together based on a string before an underscore and pivot on those groups individually. My current method uses two opposite pivots, a for loop, and an intermediate list to accomplish my goal. I am able to get my final product but for my own knowledge, I am wondering if there is a more elegant solution. I realize that I am likely not explaining things well so I have recreated the scenario with a dummy dataset.

#Required packages
library(tidyverse)

#Dummy data
file <- as_tibble(data.frame(id = c("QQQ", "WWW", "EEE", "RRR", "TTT"),
                                  state = c("aa", "bb", "cc", "dd", "ee"),
                                  city = c("ff", "gg", "hh", "ii", "jj"),
                       a_1 = runif(5),
                       a_2 = runif(5),
                       a_3 = runif(5),
                       a_4 = runif(5),
                       a_5 = runif(5),
                       a_6 = runif(5),
                       a_7 = runif(5),
                       a_8 = runif(5),
                       a_9 = runif(5),
                       a_10 = runif(5),
                       b_1 = runif(5),
                       b_2 = runif(5),
                       b_3 = runif(5),
                       b_4 = runif(5),
                       b_5 = runif(5),
                       b_6 = runif(5),
                       b_7 = runif(5),
                       b_8 = runif(5),
                       b_9 = runif(5),
                       b_10 = runif(5),
                       c_1 = runif(5),
                       c_2 = runif(5),
                       c_3 = runif(5),
                       c_4 = runif(5),
                       c_5 = runif(5),
                       c_6 = runif(5),
                       c_7 = runif(5),
                       c_8 = runif(5),
                       c_9 = runif(5),
                       c_10 = runif(5)))

#My solution
longer <- file %>%
  pivot_longer(cols = c(-id:-city),
             names_to = c(".value", "section"),
             names_pattern = "(.+)_([0-9]+$)"
) 


num_letterGroup <- ncol(longer) - 4 #4 is the number of columns i want to retain
wide_list <- vector(mode = "list", length = num_letterGroup)
name_list <- vector(mode = "character", length = num_letterGroup)

for (i in 1:num_letterGroup) {
  col_num <- 4 + i
  col_name <- colnames(longer)[col_num]
  wide <- longer %>%
    select(1:4, all_of(col_name)) %>%
    pivot_wider(names_from = section, values_from = col_name) %>%
    mutate(letterGroup = col_name)
  wide_list[[i]] <- wide
  name_list[i] <- col_name
}

names(wide_list) <- name_list

wide_df <- bind_rows(wide_list)

I realize that the amount of data given might seem excessive but I needed the column numbers to be sequential as well as reach double digits. Thank you in advance for any assistance you can provide.

EDIT TO CLARIFY: wide_df is the final product that I want

2 Answers2

3

EDIT

This is actually much simpler than the original answer. (Thanks to @thelatemail)

library(tidyr)
pivot_longer(file, 
             cols = -c(id:city), 
             names_to = c('letterGroup', '.value'), 
             names_sep = '_')


# A tibble: 15 x 14
#   id    state city  letterGroup   `1`     `2`    `3`    `4`    `5`    `6`   `7`      `8`    `9`  `10`
#   <chr> <chr> <chr> <chr>       <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>    <dbl>  <dbl> <dbl>
# 1 QQQ   aa    ff    a           0.894 0.534   0.583  0.327  0.497  0.254  0.877 0.236    0.585  0.436
# 2 QQQ   aa    ff    b           0.861 0.897   0.244  0.292  0.818  0.428  0.732 0.322    0.702  0.158
# 3 QQQ   aa    ff    c           0.371 0.842   0.918  0.615  0.346  0.675  0.821 0.718    0.461  0.374
# 4 WWW   bb    gg    a           0.573 0.00886 0.555  0.810  0.480  0.763  0.624 0.0667   0.705  0.872
# 5 WWW   bb    gg    b           0.994 0.652   0.961  0.825  0.398  0.0138 0.560 0.695    0.0171 0.704
# 6 WWW   bb    gg    c           0.113 0.988   0.663  0.0461 0.335  0.478  0.291 0.338    0.386  0.183
# 7 EEE   cc    hh    a           0.482 0.197   0.630  0.442  0.633  0.932  0.317 0.119    0.872  0.678
# 8 EEE   cc    hh    b           0.834 0.378   0.504  0.911  0.644  0.976  0.777 0.485    0.470  0.560
# 9 EEE   cc    hh    c           0.819 0.240   0.683  0.570  0.969  0.956  0.745 0.790    0.0548 0.314
#10 RRR   dd    ii    a           0.887 0.818   0.0266 0.444  0.554  0.817  0.332 0.0801   0.966  0.252
#11 RRR   dd    ii    b           0.416 0.211   0.931  0.105  0.948  0.555  0.201 0.656    0.794  0.526
#12 RRR   dd    ii    c           0.652 0.897   0.741  0.254  0.815  0.154  0.422 0.361    0.925  0.696
#13 TTT   ee    jj    a           0.391 0.626   0.358  0.296  0.804  0.743  0.655 0.000308 0.257  0.415
#14 TTT   ee    jj    b           0.764 0.686   0.0174 0.460  0.0164 0.0718 0.700 0.558    0.341  0.411
#15 TTT   ee    jj    c           0.812 0.995   0.845  0.513  0.987  0.249  0.429 0.749    0.557  0.369

Original Answer

You can get data completely in long format (no need for intermediate columns), separate the column names in two different columns and get the data in wide format.

file %>%
  pivot_longer(cols = -c(id:city)) %>%
  separate(name, into = c('letterGroup', 'col'), sep = "_") %>%
  pivot_wider(names_from = col, values_from = value)
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
0

You can try this:

library(tidyr)
df1 <- pivot_longer(file,cols = names(file)[-c(1:3)]) %>%
  separate(name,into = c('letter','number'),sep = '_') %>%
  pivot_wider(names_from = number,values_from = value,id_cols = c() )
#Reshape
df2 <- reshape(as.data.frame(df1),idvar = c('id','state','city','letter'),timevar = 'number',direction = 'wide')
names(df2) <- gsub('value.','',names(df2))
Duck
  • 39,058
  • 13
  • 42
  • 84