UPDATE: I updated the comparison with @sindri_baldur's answer - using version 1.12.6
. According to the results, patterns()
is a handy shortcut, but if performance matters, one should stick with the ..
or with = FALSE
solution (see below).
Apparently, there is a new way of achieving this from version 1.10.2 onwards.
library(data.table)
cols <- grep("bar|baz", names(mydt), value = TRUE)
mydt[, ..cols]
It seems to work the fastest out of the posted solutions.
# Creating a large data.table with 100k rows, 32 columns
n <- 100000
foo_cols <- paste0("foo", 1:30)
big_dt <- data.table(bar = rnorm(n), baz = rnorm(n))
big_dt[, (foo_cols) := rnorm(n)]
# Methods
subsetting <- function(dt) {
subset(dt, select = grep("bar|baz", names(dt)))
}
usingSD <- function(dt) {
dt[, .SD, .SDcols = names(dt) %like% "bar|baz"]
}
usingWith <- function(dt) {
cols <- grep("bar|baz", names(dt), value = TRUE)
dt[, cols, with = FALSE]
}
usingDotDot <- function(dt) {
cols <- grep("bar|baz", names(dt), value = TRUE)
dt[, ..cols]
}
usingPatterns <- function(dt) {
dt[, .SD, .SDcols = patterns("bar|baz")]
}
# Benchmark
microbenchmark(
subsetting(big_dt), usingSD(big_dt), usingWith(big_dt), usingDotDot(big_dt), usingPatterns(big_dt),
times = 5000
)
#Unit: microseconds
# expr min lq mean median uq max neval
# subsetting(big_dt) 430 759 1672 1309 1563 82934 5000
# usingSD(big_dt) 547 951 1872 1461 1797 60357 5000
# usingWith(big_dt) 278 496 1331 1112 1304 62656 5000
# usingDotDot(big_dt) 289 483 1392 1117 1344 55878 5000
# usingPatterns(big_dt) 596 1019 1984 1518 1913 120331 5000