I will edit this in a moment with something that looks at periods in column names (much more complicated) but, without automating, you can create nested lists like so:
df <- data.frame('var1.gender' = c(1,1,3,3), 'var1.score.raw' = c(12.3, 12.4, 14.5, 13.2), 'var1.score.raw.lower' = c(11,11,13,12), 'var1.score.raw.upper' = c(13,13,15,14), 'var2.gender' = c(1,1,3,3), 'var2.score.raw' = c(12.3, 12.4, 14.5, 13.2), 'var2.score.raw.lower' = c(11,11,13,12), 'var2.score.raw.upper' = c(13,13,15,14))
df
# changed your naming here to remove the not-needed ".raw."
colnames(df) <- c("var1.gender", "var1.score.raw", "var1.score.lower", "var1.score.upper", "var2.gender", "var2.score.raw", "var2.score.lower", "var2.score.upper")
nested <- with(df, expr = {list(var1 = list(gender = var1.gender,
score = list(raw = var1.score.raw,
lower = var1.score.lower,
upper = var1.score.upper)),
var2 = list(gender = var2.gender,
score = list(raw = var2.score.raw,
lower = var2.score.lower,
upper = var2.score.upper)))})
nested
$var1
$var1$gender
[1] 1 1 3 3
$var1$score
$var1$score$raw
[1] 12.3 12.4 14.5 13.2
$var1$score$lower
[1] 11 11 13 12
$var1$score$upper
[1] 13 13 15 14
$var2
$var2$gender
[1] 1 1 3 3
$var2$score
$var2$score$raw
[1] 12.3 12.4 14.5 13.2
$var2$score$lower
[1] 11 11 13 12
$var2$score$upper
[1] 13 13 15 14
Tried to make a dynamic version of this but got lost thinking about recursion. Anyways, this may work if you extend the number of varX you have in your dataset. It's not as clean as doing it by hand and still has a $empty list.
nester <- function(df, splitby = "."){
separated <- strsplit(colnames(df), paste0("[", splitby, "]"))
# in order to rbind this into a matrix, we have to make all vectors the same length
n <- max(rapply(separated, length))
separated <- do.call(rbind, rapply(separated, function(x) {length(x) <- n; x }, how = "replace"))
separated <- ifelse(is.na(separated), "empty", separated)
listnames <- apply(separated, 2, unique)
L <- list()
# Assumes n is 3.
for(L1 in listnames[[1]]){
L[[L1]] <- list() # create List level 1
for(L2 in listnames[[2]]){
L[[L1]][[L2]] <- list() # create List level 2
for(L3 in listnames[[3]]){
L[[L1]][[L2]][[L3]] <- list() # create list level 3
# If no data exists for that list combination ...
if(length(df[,which(separated[,1] == L1 & separated[,2] == L2 & separated[,3] == L3)]) == 0){
L[[L1]][[L2]][[L3]] <- NULL # then remove that nested list.
} else {
# otherwise go ahead and put that column in as a list
L[[L1]][[L2]][[L3]] <- df[,which(separated[,1] == L1 & separated[,2] == L2 & separated[,3] == L3)]
# if data is sitting in a list$empty ...
if( L3 == "empty" ){
z <- unname(unlist(L[[L1]][[L2]][[L3]]))
L[[L1]][[L2]][[L3]] <- as.vector(z) # save the empty L3 to the L2
#L[[L1]][[L2]][[L3]] <- NULL # and delete the L3
}
}
}
}
}
return(L)
}
df.List <- nester(df, splitby = ".")
df.List