If your column Y
is a character vector, then this should do:
out <- stack(setNames(strsplit(df$Y, " "), df$X))
out$values <- as.numeric(out$values)
Benchmarking all answers on a 20e4 row data.frame:
Creating data:
df <- read.table(header=TRUE, text="X Y
1 '123 234 345 456'
2 '222 333 444 555 666'")
# thanks to @MatthewPlourde for the suggestion to use replicate
df <- do.call(rbind, replicate(10000, df, simplify = FALSE))
dim(df)
# [1] 20000 2
sapply(df, class)
X Y
"integer" "factor"
Functions:
# Arun's function
Arun <- function(df) {
out <- stack(setNames(strsplit(as.character(df$Y), " "), df$X))
out$values <- as.numeric(out$values)
out
}
# Ananda's function
Ananda <- function(Data) {
Data1 <- cbind(X = Data$X,
read.table(text = as.character(Data$Y),
fill = TRUE, header = FALSE))
data.frame(X = Data1[, 1], stack(Data1[-1]))
}
# Matthew's solution
Matthew <- function(d) {
stack(by(d$Y, d$X, function(x)
as.numeric(scan(text=as.character(x),
what='', quiet=TRUE))))
}
Benchmarking:
require(microbenchmark)
microbenchmark(a1 <- Arun(df), a2 <- Ananda(df), a3 <- Matthew(df), times = 5)
Unit: milliseconds
expr min lq median uq max neval
a1 <- Arun(df) 235.6945 258.8485 264.4166 329.2974 392.9559 5
a2 <- Ananda(df) 6661.8461 6972.2823 7825.3701 8210.9970 9454.5762 5
a3 <- Matthew(df) 3589.1784 3691.3826 3787.4163 4020.4895 5034.6580 5