My original data looks like this :
I tried to write it in the for loop, but it did not work. The pivot_wider command also did not work. Any suggestion?
Two reshaping options.
tidyverse
-library(dplyr)
library(tidyr)
df %>%
group_by(source) %>%
mutate(row = row_number()) %>%
ungroup() %>%
pivot_wider(names_from = row, values_from = c(location, value))
# source location_1 location_2 location_3 location_4 value_1 value_2 value_3 value_4
# <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#1 a ww rr de NA 12 32 22 NA
#2 b rr tt ef ef 32 45 56 57
#3 c er eg NA NA 50 33 NA NA
data.table
-library(data.table)
dcast(setDT(df), source~rowid(source), value.var = c('location', 'value'))
Using library(data.table)
, we can do
rbindlist(lapply(unique(dt$source),
function(x) dt[source==x, as.list(c(x, unlist(.SD[,-1])))]),
fill = TRUE)
# V1 value1 value2 value3 location1 location2 location3 value4 location4
# 1: a 12 32 22 ww rr de <NA> <NA>
# 2: b 32 45 56 rr tt ef 57 ef
# 3: c 50 33 <NA> er eg <NA> <NA> <NA>
The data:
dt = data.table(source = c('a','a','a','b','b','b','b','c','c'),
value = c(12,32,22,32,45,56,57,50,33),
location= c('ww','rr','de','rr','tt','ef','ef','er','eg'))
A tidyverse option. coalesce_by_column
is by Tung.
library(dplyr)
library(tidyr)
library(stringr)
coalesce_by_column <- function(df) {
return(dplyr::coalesce(!!! as.list(df)))}
df %>%
group_by(source) %>%
mutate(valPivot = str_c('value', row_number()),
locPivot = str_c('location', row_number()),
across(everything(), ~ as.character(.))) %>%
pivot_wider(names_from = valPivot, values_from = value) %>%
pivot_wider(names_from = locPivot, values_from = location) %>%
summarise(across(everything(), coalesce_by_column)) %>%
mutate(across(starts_with('V'), ~ as.integer(.))) %>%
select(source, colnames(.)[order(str_extract(colnames(.), '.$'))][-length(colnames(.))])
# # A tibble: 3 x 9
# source value1 location1 value2 location2 value3 location3 value4 location4
# <chr> <int> <chr> <int> <chr> <int> <chr> <int> <chr>
# 1 a 12 ww 32 rr 22 de NA NA
# 2 b 32 rr 45 tt 56 ef 57 ef
# 3 c 50 er 33 eg NA NA NA NA
Data
df <- tibble(source = c('a','a','a','b','b','b','b','c','c'),
value = c(12,32,22,32,45,56,57,50,33),
location= c('ww','rr','de','rr','tt','ef','ef','er','eg'))