4

I'm trying to pivot multiple values at a time.

I have this:

>  head(data)
# A tibble: 6 x 3
  variable       `Mean (SD)`    `Median (IQR)`
  <chr>          <glue>         <glue>        
1 VarA_VVV_Cond1 268.59 (80.6)  276 (86)      
2 VarA_WWW_Cond1 149.07 (39.79) 155 (40.5)    
3 VarA_XXX_Cond1 147.71 (39.65) 155 (41)      
4 VarA_YYY_Cond1 18.85 (10.76)  18 (15.5)     
5 VarA_ZZZ_Cond1 20.98 (11.34)  20 (14)       
6 VarA_VVV_Cond2 228.49 (83.77) 241 (116)  

The desired output is this:

          VVV(Mean/SD) VVV(Median/IQR) XXX(Mean/SD) XXX(Median/IQR)... 
VAR_A_Cond_1
VAR_A_Cond_2
VAR_B_Cond_1
VAR_B_Cond_2

This is my dataset:

>  dput(data)
structure(list(variable = c("VarA_VVV_Cond1", "VarA_WWW_Cond1", 
"VarA_XXX_Cond1", "VarA_YYY_Cond1", "VarA_ZZZ_Cond1", "VarA_VVV_Cond2", 
"VarA_WWW_Cond2", "VarA_XXX_Cond2", "VarA_YYY_Cond2", "VarA_ZZZ_Cond2", 
"VarB_VVV_Cond1", "VarB_WWW_Cond1", "VarB_XXX_Cond1", "VarB_YYY_Cond1", 
"VarB_ZZZ_Cond1", "VarB_VVV_Cond2", "VarB_WWW_Cond2", "VarB_XXX_Cond2", 
"VarB_YYY_Cond2", "VarB_ZZZ_Cond2"), `Mean (SD)` = structure(c("268.59 (80.6)", 
"149.07 (39.79)", "147.71 (39.65)", "18.85 (10.76)", "20.98 (11.34)", 
"228.49 (83.77)", "113.66 (35.91)", "112.64 (35.75)", "24.07 (15.79)", 
"26.36 (16.51)", "250.72 (61.53)", "140.71 (30.52)", "138.93 (30.37)", 
"21.02 (10.46)", "22.72 (11.05)", "225.98 (81.32)", "112.43 (36.09)", 
"111.1 (36.41)", "24.71 (16.77)", "26.59 (17.49)"), class = c("glue", 
"character")), `Median (IQR)` = structure(c("276 (86)", "155 (40.5)", 
"155 (41)", "18 (15.5)", "20 (14)", "241 (116)", "116 (51)", 
"116 (48)", "23 (21.5)", "24 (22.5)", "259 (60)", "142 (36)", 
"142 (34)", "21 (15)", "21 (15)", "244.5 (93.5)", "107.5 (51.5)", 
"107 (50.75)", "24 (20.75)", "24.5 (21.75)"), class = c("glue", 
"character"))), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame"))

I've tried many things, but nothing had solved the issue:

Like this:

data1 <- data %>% 
   tidyr::pivot_wider(.,
                      names_from = "variable",
                      values_from = c("Mean (SD)", "Median (IQR)")) %>% 
   pivot_longer(cols = 1:40,
   names_to = c("Names"),
   values_to = c("Mean_SD", "Median_IQR"))

Any thoughts??

Thanks in advance!

Additional info: I have 20 rows/obs in the original "data". Each is called: VarA_VVV_Cond1, which means that I have 2 variables: (A and B), 5 tests (VVV, WWW, XXX, YYY, ZZZ) and 2 conditions (Cond1, Cond2).

Given that, I also have the tests Mean (SD) and Median (IQR). That's the idea.

Obs: I've seen many posts here concerning pivoting, but none seems to account for this (like this, for example...I'd really appreciate some help!

marc_s
  • 732,580
  • 175
  • 1,330
  • 1,459
Larissa Cury
  • 806
  • 2
  • 11
  • The original ```Mean (SD)``` and ```Median (IQR)``` values :) So that I'd have ten colluns (one for each test), like this: ````VVV(Mean/SD), VVV(Median/IQR), ....." – Larissa Cury Sep 26 '22 at 19:11

3 Answers3

4

We may need to separate the column 'variable' before we do the pivoting to 'wide'

library(dplyr)
library(tidyr)
library(stringr)
data %>%
   mutate(variable = str_replace(variable, "^(\\w+)_(\\w+)_(\\w+)",
        "\\1_\\3,\\2")) %>% 
   separate(variable, into = c("variable", "newcol"), sep = ",") %>% 
   pivot_wider(names_from = newcol, values_from = c(`Mean (SD)`,
      `Median (IQR)`), names_glue = "{newcol}({.value})")%>% 
    rename_with(~ str_remove(str_replace(.x, "\\s+\\(", "/"), "\\)"), -variable)

-output

# A tibble: 4 × 11
  variable   `VVV(Mean/SD)` `WWW(Mean/SD)` `XXX(Mean/SD)` `YYY(Mean/SD)` `ZZZ(Mean/SD)` `VVV(Median/IQR)` `WWW(Median/IQR)` `XXX(Median/IQR)` `YYY(Median/IQR)`
  <chr>      <glue>         <glue>         <glue>         <glue>         <glue>         <glue>            <glue>            <glue>            <glue>           
1 VarA_Cond1 268.59 (80.6)  149.07 (39.79) 147.71 (39.65) 18.85 (10.76)  20.98 (11.34)  276 (86)          155 (40.5)        155 (41)          18 (15.5)        
2 VarA_Cond2 228.49 (83.77) 113.66 (35.91) 112.64 (35.75) 24.07 (15.79)  26.36 (16.51)  241 (116)         116 (51)          116 (48)          23 (21.5)        
3 VarB_Cond1 250.72 (61.53) 140.71 (30.52) 138.93 (30.37) 21.02 (10.46)  22.72 (11.05)  259 (60)          142 (36)          142 (34)          21 (15)          
4 VarB_Cond2 225.98 (81.32) 112.43 (36.09) 111.1 (36.41)  24.71 (16.77)  26.59 (17.49)  244.5 (93.5)      107.5 (51.5)      107 (50.75)       24 (20.75)       
# … with 1 more variable: `ZZZ(Median/IQR)` <glue>
akrun
  • 874,273
  • 37
  • 540
  • 662
  • OMG, I can't believe that the solution was THAT simple!!! Thank you, SO MUCH! If you don't mind, would you explain this regex part ```"^(\\w+)_(\\w+)_(\\w+)", "\\1_\\3,\\2")) ``` ? – Larissa Cury Sep 26 '22 at 19:15
  • @LarissaCury The `(..)` syntax is for capturing some characters. `\\w+` - matches the word from the start (`^`) of the string, and is captured, then matches the `_`, folllowed by capturing the second and third word as described earlier. Then, we just rearrange the backreferences of the captured group so that we can split by a unique separator `,`. The `rename_with` is also done so as to match your expected column name pattern – akrun Sep 26 '22 at 19:19
  • This is something (regex) I'll have to study more, thank you! What If I wanted to put a space between ```ZZZ(Median/IQR)``` like ```ZZZ(Median/IQR)``` ? I've tried " " , \s, but it didn't work... @akrun – Larissa Cury Sep 26 '22 at 19:38
  • @LarissaCury you can use `.. %>% pivot_wider(names_from = newcol, values_from = c(`Mean (SD)`, `Median (IQR)`), names_glue = "{newcol} ({.value})") %>% rename_with(~ str_replace(.x, "\\s*\\((\\w+)\\)", "/\\1"), -variable)` – akrun Sep 26 '22 at 19:43
  • thank you! it worked! do you recommend any introduction to regex? – Larissa Cury Sep 26 '22 at 23:04
  • @LarissaCury you could check [here](https://www.regular-expressions.info/tutorial.html) or [here](https://regexlearn.com/) – akrun Sep 27 '22 at 15:00
  • 1
    thanks for the recomendations! – Larissa Cury Sep 27 '22 at 15:00
4

Something like this:

library(tidyverse)

df %>% 
  separate(variable, into = c("Var", "XXX", "Cond"), sep = "_") %>% 
  pivot_wider(names_from = XXX,
              values_from = c(`Mean (SD)`,`Median (IQR)`)) %>% 
  mutate(x = paste(Var, Cond, sep = "_"), .keep="unused", .before=1) %>% 
  column_to_rownames("x")
    Mean (SD)_VVV  Mean (SD)_WWW  Mean (SD)_XXX Mean (SD)_YYY Mean (SD)_ZZZ Median (IQR)_VVV Median (IQR)_WWW Median (IQR)_XXX Median (IQR)_YYY Median (IQR)_ZZZ
VarA_Cond1  268.59 (80.6) 149.07 (39.79) 147.71 (39.65) 18.85 (10.76) 20.98 (11.34)         276 (86)       155 (40.5)         155 (41)        18 (15.5)          20 (14)
VarA_Cond2 228.49 (83.77) 113.66 (35.91) 112.64 (35.75) 24.07 (15.79) 26.36 (16.51)        241 (116)         116 (51)         116 (48)        23 (21.5)        24 (22.5)
VarB_Cond1 250.72 (61.53) 140.71 (30.52) 138.93 (30.37) 21.02 (10.46) 22.72 (11.05)         259 (60)         142 (36)         142 (34)          21 (15)          21 (15)
VarB_Cond2 225.98 (81.32) 112.43 (36.09)  111.1 (36.41) 24.71 (16.77) 26.59 (17.49)     244.5 (93.5)     107.5 (51.5)      107 (50.75)       24 (20.75)     24.5 (21.75)
> 
TarJae
  • 72,363
  • 6
  • 19
  • 66
3

This works too:

df[c('Var', 'Group', 'Cond')] <- str_split_fixed(df$variable, "_", n = Inf)

df %>%
  pivot_wider(id_cols = c(Var, Cond), 
              values_from = c(`Mean (SD)`, `Median (IQR)`), 
              names_from = Group)


Var   Cond  `Mean (SD)_VVV` `Mean (SD)_WWW` `Mean (SD)_XXX` `Mean (SD)_YYY` `Mean (SD)_ZZZ` `Median (IQR)_VVV`
  <chr> <chr> <glue>          <glue>          <glue>          <glue>          <glue>          <glue>            
1 VarA  Cond1 268.59 (80.6)   149.07 (39.79)  147.71 (39.65)  18.85 (10.76)   20.98 (11.34)   276 (86)          
2 VarA  Cond2 228.49 (83.77)  113.66 (35.91)  112.64 (35.75)  24.07 (15.79)   26.36 (16.51)   241 (116)         
3 VarB  Cond1 250.72 (61.53)  140.71 (30.52)  138.93 (30.37)  21.02 (10.46)   22.72 (11.05)   259 (60)          
4 VarB  Cond2 225.98 (81.32)  112.43 (36.09)  111.1 (36.41)   24.71 (16.77)   26.59 (17.49)   244.5 (93.5) 
Lucca Nielsen
  • 1,497
  • 3
  • 16