Very good question. A tidyverse
approach. This approach will have combination of uneven number of columns per group. Explanation -
- Data is divided into a list with each sub-group as a separate item in the list. For this division
- Firstly, the data is pivoted long using
pivot_longer
- then a dummy group (sub-group identification) column in created using
gsub
. You may use str_replace
too.
- list created using
dplyr::group_split
- data in all items reshaped back to its original form using
tidyr::pivot_wider
inside purrr::map
now
- thereafter all individual items of list -
- first combined using
combn
and Reduce
. You may also use purrr::reduce
here
- secondly names of new columns generated using same
combn
and Reduce
- these names bound above matrix into a named dataframe.
- lastly, using
purrr::reduce
in conjunction with dplyr::left_join
list is converted back to intended shape
set.seed(123)
a1 <- rnorm(n = 10)
b1 <- rnorm(n = 10)
c1 <- rnorm(n = 10)
a2 <- rnorm(n = 10)
b2 <- rnorm(n = 10)
c2 <- rnorm(n = 10)
Original <- data.frame(
date = seq(Sys.Date()-9, Sys.Date(), by = 1),
a1 = a1,
b1 = b1,
c1 = c1,
a2 = a2,
b2 = b2,
c2 = c2)
library(tidyverse)
Original %>% pivot_longer(!date) %>%
mutate(grp = gsub('^\\D*(\\d)+$', '\\1', name)) %>%
group_split(grp, .keep = F) %>%
map(~ .x %>% pivot_wider(names_from = name, values_from = value)) %>%
map(~ combn(.x[-1], 2, FUN = Reduce, f = `*`) %>% as.data.frame() %>%
setNames(combn(names(.x[-1]), 2, FUN = Reduce, f = paste0)) %>% cbind(.x[1], .)) %>%
reduce(~left_join(.x, .y, by = 'date'))
date a1b1 a1c1 b1c1 a2b2 a2c2 b2c2
1 2021-05-28 -0.68606804 0.59848918 -1.30710356 -0.29626767 0.108031283 -0.175982140
2 2021-05-29 -0.08282104 0.05017292 -0.07843039 0.06135046 0.008423333 0.005935364
3 2021-05-30 0.62468579 -1.59924166 -0.41119329 -1.13268875 -0.038374446 0.054248120
4 2021-05-31 0.00780406 -0.05139295 -0.08067566 1.90463287 1.201815497 2.968438088
5 2021-06-01 -0.07186344 -0.08080991 0.34742254 0.99243873 -0.185489171 -0.272722771
6 2021-06-02 3.06467216 -2.89278864 -3.01397443 -0.77341778 1.044302702 -1.703161152
7 2021-06-03 0.22946735 0.38614963 0.41709268 -0.22316502 -0.857881519 0.623969018
8 2021-06-04 2.48789113 -0.19402639 -0.30162620 0.02889143 -0.036194437 -0.272813136
9 2021-06-05 -0.48172830 0.78173260 -0.79823906 -0.23864021 -0.037894774 0.096601990
10 2021-06-06 0.21070515 -0.55877763 -0.59279292 0.03171951 -0.082159505 -0.018002847
Check it for this extended dataset
set.seed(123)
a1 <- rnorm(n = 10)
b1 <- rnorm(n = 10)
c1 <- rnorm(n = 10)
a2 <- rnorm(n = 10)
b2 <- rnorm(n = 10)
c2 <- rnorm(n = 10)
d2 <- rnorm(n = 10)
Original <- data.frame(
date = seq(Sys.Date()-9, Sys.Date(), by = 1),
a1 = a1,
b1 = b1,
c1 = c1,
a2 = a2,
b2 = b2,
c2 = c2,
d2 = d2)
library(tidyverse)
Original %>% pivot_longer(!date) %>%
mutate(grp = gsub('^\\D*(\\d)+$', '\\1', name)) %>%
group_split(grp, .keep = F) %>%
map(~ .x %>% pivot_wider(names_from = name, values_from = value)) %>%
map(~ combn(.x[-1], 2, FUN = Reduce, f = `*`) %>% as.data.frame() %>%
setNames(combn(names(.x[-1]), 2, FUN = Reduce, f = paste0)) %>% cbind(.x[1], .)) %>%
reduce(~left_join(.x, .y, by = 'date'))
date a1b1 a1c1 b1c1 a2b2 a2c2 a2d2 b2c2 b2d2 c2d2
1 2021-05-28 -0.68606804 0.59848918 -1.30710356 -0.29626767 0.108031283 0.161902656 -0.175982140 -0.26373820 0.09616971
2 2021-05-29 -0.08282104 0.05017292 -0.07843039 0.06135046 0.008423333 0.148221326 0.005935364 0.10444173 0.01433970
3 2021-05-30 0.62468579 -1.59924166 -0.41119329 -1.13268875 -0.038374446 -0.298262480 0.054248120 0.42163941 0.01428475
4 2021-05-31 0.00780406 -0.05139295 -0.08067566 1.90463287 1.201815497 -0.894445153 2.968438088 -2.20924515 -1.39402460
5 2021-06-01 -0.07186344 -0.08080991 0.34742254 0.99243873 -0.185489171 -0.880563395 -0.272722771 -1.29468307 0.24197936
6 2021-06-02 3.06467216 -2.89278864 -3.01397443 -0.77341778 1.044302702 0.209022041 -1.703161152 -0.34089562 0.46029226
7 2021-06-03 0.22946735 0.38614963 0.41709268 -0.22316502 -0.857881519 0.248271309 0.623969018 -0.18057692 -0.69416615
8 2021-06-04 2.48789113 -0.19402639 -0.30162620 0.02889143 -0.036194437 -0.003281582 -0.272813136 -0.02473471 0.03098700
9 2021-06-05 -0.48172830 0.78173260 -0.79823906 -0.23864021 -0.037894774 -0.282179411 0.096601990 0.71933645 0.11422674
10 2021-06-06 0.21070515 -0.55877763 -0.59279292 0.03171951 -0.082159505 -0.779997773 -0.018002847 -0.17091365 0.44269850
Created on 2021-06-06 by the reprex package (v2.0.0)