Using first element as starting value.
f <- function(x) {
r <- x[c(1L, 1L)]
for (i in 2:length(x)) {
if (x[i] < r[1L]) r[1L] <- x[i]
if (x[i] > r[2L]) r[2L] <- x[i]
}
r
}
However, such loops are slow in R, but we could implement it using Rcpp
,
rcppfun <- "
Rcpp::NumericVector myrange(Rcpp::NumericVector x) {
std::vector<double> r(2);
r[0] = x[0];
r[1] = x[0];
for (int i = 1; i < x.size(); ++i) {
if (x[i] < r[0]) {
r[0] = x[i];
}
if (x[i] > r[1]) {
r[1] = x[i];
}
}
return Rcpp::wrap(r);
}
"
library(Rcpp)
f_rcpp <- cppFunction(rcppfun)
set.seed(42)
x <- rnorm(1e7)
stopifnot(all.equal(range(x), f(x)) & all.equal(range(x), f_rcpp(x)))
f(x)
# [1] -5.522383 5.537123
f_rcpp(x)
# [1] -5.522383 5.537123
which appears to be much faster than range()
. The reason for this is that base:::range.default
c
oncatenates min(x)
and min(x)
, i.e. essentially two for
loops are used whereas f_rcpp
uses only one. Notice, that f_rcpp
also works with matrices f_rcpp(mat)
, and with data frames, f_rcpp(as.matrix(df))
works.
microbenchmark::microbenchmark(
f(x), f_rcpp(x), range(x), minmax(x), times=3L
)
Unit: milliseconds
expr min lq mean median uq max neval cld
f(x) 1478.53334 1478.54111 1488.13588 1478.54889 1492.93715 1507.32542 3 b
f_rcpp(x) 53.66378 53.77902 54.28918 53.89426 54.60187 55.30949 3 a
range(x) 97.38360 107.07452 113.62282 116.76545 121.74244 126.71942 3 a
minmax(x) 1443.86547 1444.31277 1484.25910 1444.76007 1504.45592 1564.15176 3 b