2

My question is an expansion of the question posed here How to reshape data from long to wide format so I will phrase it in a similar way.

The difference is that I want to rearrange one long data table into a list of wide data tables.

dat <- data.table(
    sim = rep(c(1,2), each=4),
    time = rep(1:4, 2),
    value1 = rnorm(8),
    value2 = rnorm(8)
    )

dat
   sim  time      value1  value2
1    1     1      0.3407  0.5167
2    1     2     -0.7033  0.8416
3    1     3     -0.3795 -0.4717
4    1     4     -0.7460  0.8479
5    2     1      0.8981 -0.7163
6    2     2     -0.3347 -0.6849
7    2     3      0.5013  0.8941
8    2     4     -0.1745  0.0795

I want to reshape it so that I have a list of wide data tables named value1, value2 ... value99 etc...

l = list()


l[["value1"]]

    sim        1       2       3       4
1     1   0.3407 -0.7033 -0.3795 -0.7460
5     2  -0.8981 -0.3347 -0.5013 -0.1745

l[["value2"]]

    sim        1       2       3       4
1     1   0.5167  0.8416 -0.4717  0.8479
5     2  -0.7163 -0.6849  0.8941  0.0795

r2evans
  • 141,215
  • 6
  • 77
  • 149
orange90
  • 65
  • 5
  • The [tag:datatable] tag is a bit ambiguous; do you intend the [tag:data.table] (R's `data.table` package)? If not, then this tag is inappropriate for this question, preferring perhaps [tag:data.frame] for R's frames. – r2evans Apr 07 '21 at 14:01
  • 1
    I am using the data.table package, yes. I had dataframe in the code by mistake, thanks – orange90 Apr 07 '21 at 14:15
  • (You can see with the plethora of dplyr-based answers that that makes a big difference :-) – r2evans Apr 07 '21 at 14:20

5 Answers5

5

Two variants.

data.table

library(data.table)
tmp <- dcast(melt(as.data.table(dat), id = c("sim", "time")), sim + variable ~ time)
tmp <- split(tmp, tmp$variable)
tmp <- lapply(tmp, set, i = NULL, j = "variable", value = NULL)
tmp
# $value1
#      sim             1          2          3          4
#    <num>         <num>      <num>      <num>      <num>
# 1:     1  1.0458737762 -0.4845954  0.1891288 0.05100633
# 2:     2 -0.0002406689  1.8093820 -0.8253280 1.14547045
# $value2
#      sim           1          2           3         4
#    <num>       <num>      <num>       <num>     <num>
# 1:     1  0.03157319 -0.8352058 -0.06876365 0.7467717
# 2:     2 -0.42551873 -0.7720822  0.15276411 0.9885968

I often use magrittr::%>% with data.table as well, so that can be converted into

library(data.table)
library(magrittr) # if %>% is not already available
as.data.table(dat) %>%
  melt(., id = c("sim", "time")) %>%
  dcast(., sim + variable ~ time) %>%
  split(., .$variable) %>%
  lapply(., set, i = NULL, j = "variable", value = NULL)
# $value1
#      sim             1          2          3          4
#    <num>         <num>      <num>      <num>      <num>
# 1:     1  1.0458737762 -0.4845954  0.1891288 0.05100633
# 2:     2 -0.0002406689  1.8093820 -0.8253280 1.14547045
# $value2
#      sim           1          2           3         4
#    <num>       <num>      <num>       <num>     <num>
# 1:     1  0.03157319 -0.8352058 -0.06876365 0.7467717
# 2:     2 -0.42551873 -0.7720822  0.15276411 0.9885968

tidyverse

library(dplyr)
library(tidyr) # pivot_longer, pivot_wider
dat %>%
  pivot_longer(., -c(sim, time)) %>%
  pivot_wider(., names_from = time, values_from = value) %>%
  split(., .$name) %>%
  lapply(., select, -name)
# $value1
# # A tibble: 2 x 5
#     sim       `1`    `2`    `3`    `4`
#   <dbl>     <dbl>  <dbl>  <dbl>  <dbl>
# 1     1  1.05     -0.485  0.189 0.0510
# 2     2 -0.000241  1.81  -0.825 1.15  
# $value2
# # A tibble: 2 x 5
#     sim     `1`    `2`     `3`   `4`
#   <dbl>   <dbl>  <dbl>   <dbl> <dbl>
# 1     1  0.0316 -0.835 -0.0688 0.747
# 2     2 -0.426  -0.772  0.153  0.989
r2evans
  • 141,215
  • 6
  • 77
  • 149
  • 1
    I never think of dcast! Very neat solution – orange90 Apr 07 '21 at 15:33
  • Two things: the trick here is a double-reshape (longer, then wider); and `dcast`/`melt` are analogous to `pivot_longer`/`pivot_wider` from `tidyr` fame. Glad it works for you. – r2evans Apr 07 '21 at 15:34
1

My solution to this issue would be to create a nested datafrae of the results. I have provided a brief description of the method followed by a reprex.

I would do this by using pivot_wider() and pivot_longer() to reshape the data. pivot_longer is used first to make each row only contain 1 value with a label for the time, simulation and whether it is value one or two. Then using pivot_wider each row will contain the values at each time with a label for the simulation and which set of values they are. (value1 or value2).

Finally we nest the dataframe using nest which stores all the data for each set of values in a dataframe. This can be accessed as an array of dataframes by nested_vals$data if necessary where nested_vals is the object we assigned the nested dataframe to.

library(tidyverse)

#Setup data
dat <- data.frame(
  sim = rep(c(1,2), each=4),
  time = rep(1:4, 2),
  value1 = rnorm(8),
  value2 = rnorm(8)
)

# Construct nested dataframe
nested_vals <- dat %>%
  # Format dataset in tidy format
  pivot_longer(cols = c(value1, value2)) %>% 
  # Move the name of the data to the beginning of the dataframe
  relocate(name) %>% 
  # Pivot to matrix form as requested (i.e. times as columns, sims as rows)
  pivot_wider(id_cols = c(name, sim), names_from = time, values_from = value) %>% 
  # Nest results by name
  nest(-name)
#> Warning: All elements of `...` must be named.
#> Did you want `data = c(sim, `1`, `2`, `3`, `4`)`?

nested_vals
#> # A tibble: 2 x 2
#>   name   data                
#>   <chr>  <list>              
#> 1 value1 <tibble[,5] [2 x 5]>
#> 2 value2 <tibble[,5] [2 x 5]>

nested_vals$data[[2]]
#> # A tibble: 2 x 5
#>     sim     `1`      `2`    `3`    `4`
#>   <dbl>   <dbl>    <dbl>  <dbl>  <dbl>
#> 1     1  0.0639 0.250    -1.28   0.850
#> 2     2 -1.90   0.000421  0.704 -0.164

Created on 2021-04-07 by the reprex package (v2.0.0)

Joel Kandiah
  • 1,465
  • 5
  • 15
1

One more way, with a single pipe syntax

library(tidyverse)
dat %>% pivot_longer(c(value1, value2)) %>%
  group_split(name) %>% setNames(map(., ~.x[[3]][1])) %>%
  map(~ .x %>% pivot_wider(id_cols = sim, names_from = time, values_from = value))

$value1
# A tibble: 2 x 5
    sim    `1`     `2`    `3`    `4`
  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
1     1 -0.851 -0.0484 -0.656 -0.121
2     2 -0.645  1.59   -0.274  0.445

$value2
# A tibble: 2 x 5
    sim   `1`    `2`    `3`   `4`
  <dbl> <dbl>  <dbl>  <dbl> <dbl>
1     1  1.46 -1.62  -0.672 1.43 
2     2  1.65  0.790  0.495 0.162

AnilGoyal
  • 25,297
  • 4
  • 27
  • 45
1

Another approach:

library(dplyr)
library(tidyr)
wide_dat <- dat %>% pivot_wider(id_cols = sim, names_from = time, values_from = starts_with('value'))
lapply(lapply(split.default(wide_dat[-1], sub('_\\d','',names(wide_dat[-1]))), function(x) cbind(wide_dat[1],x)), setNames, c('sim', 1:4))
$value1
  sim          1         2        3         4
1   1 -0.1704969 0.2820143 1.181898 2.2377396
2   2  2.1920534 0.8214070 0.421177 0.7601796

$value2
  sim          1         2          3          4
1   1  0.1760887 0.3440053 -0.8435849  0.6729751
2   2 -0.1714095 1.5125986 -0.5739871 -0.9648294
Karthik S
  • 11,348
  • 2
  • 11
  • 25
0

A tidyverse solution could be:

library(dplyr)
library(purrr)
library(tidyr)

dat_longer <- dat %>%
  tidyr::pivot_longer(starts_with("value"), names_to="col_name", values_to="values")

list_wide <- purrr::map(unique(dat_longer[["col_name"]]),
                         ~dat_longer %>%
                           dplyr::filter(col_name==.x) %>%
                           tidyr::pivot_wider(values_from = "values", names_from="time") %>% 
                           select(-col_name)) %>% 
  purrr::set_names(unique(dat_longer[["col_name"]]))

$value1
# A tibble: 2 x 5
    sim    `1`    `2`    `3`    `4`
  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
1     1 -0.710 -0.334 -0.370  0.777
2     2  0.130  0.877  1.24  -0.202

$value2
# A tibble: 2 x 5
    sim    `1`    `2`    `3`    `4`
  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
1     1 -0.719 -0.909 0.0821 -0.158
2     2 -0.706  1.51  0.234   1.09 
luismf
  • 336
  • 1
  • 7