Unfortunately, tfread
doesn't seem to exist.
Timing of the proposed solutions for 200 observations of 20000 character/integer variables
/ 20000 observations of 200 character/integer variables
:
readLines-fread
(@jan-glx): 7 s / 1.2 s
fread
-transpose
-paste
-read.table
(@Onymambu): 8 s / 36 s
- all-within-j:
fread
-transpose
-write.csv
-paste
-fread
(@Clayton Stanley) : 5 min / 12 s
- command line transpose-
fread
(@jan-glx): 2.4 s / 1.6 s
- iotools-paste-paste-fread (@jan-glx): 1.4 s / 1.2 s
fread
-transpose
-type.convert
(@Frank ): 4.2 s / 3.6 s
Code:
library(data.table)
file <- tempfile("tmp.txt")
p <- 100 # = 200 lines/columns
n <- 10000 # = 20000 values per line / rows
writeLines(rep(c(paste("Name",paste0(rep(c("Peter","Paul"), n), collapse = ","), sep=","),
paste("Age",paste0(rep(c("40","5"), n), collapse = ","), sep=",")
), p), file(file,"wb"))
system.time({ # 1
lines <- readLines(file)
lines <- lapply(lines, function(x) gsub(pattern=",", replacement="\n", x, fixed=TRUE))
lines <- lapply(lines, fread)
dt <- do.call(cbind,lines)
dim(dt)
})
system.time({ # 2
DT=setDT(read.table(text=do.call(paste,transpose(fread(file,h=F))),h=T,stringsAsFactors = F))
dim(DT)
})
system.time({ # 3
aTbl = fread(file, colClasses="character", header=F)
invisible(
aTbl[, .SD
][, transpose(.SD)
][, setnames(.SD, .SD[1, t(.SD)])
][2:.N
][, fread(paste0(capture.output(write.csv(.SD, stdout(), row.names=F, quote=F)), collapse='\n'))
][, {bTbl <<- copy(.SD); .SD}
]
)
dim(bTbl)
})
system.time({ # 4 wide
dt <- fread(paste0("transpose -t -l 20005x205 --fsep , \"", file, "\""))
dim(dt)
})
system.time({ # 4 long
dt <- fread(paste0("transpose -t -l 205x20005 --fsep , \"", file, "\""))
dim(dt)
})
system.time({ # 5
infile <- file(file, "rb")
df <- iotools::chunk.tapply(infile, function(x) {
fread(paste0(apply(iotools::mstrsplit(x, sep=","), 2, paste0, collapse = ","), collapse = "\n"))
}, CH.MERGE = cbind)
dim(df)
})
system.time({ # 6
d <- fread(file, header=FALSE);
d <- d[, lapply(transpose(.SD[,-1]), type.convert)][, setnames(.SD, d[[1]])]
dim(d)
})