The simplest improvement is to start your loop from the end of the vector and work backwards, instead of starting from the front. You can then save time by exiting the loop at the first non-zero element, instead of looping through the whole vector.
I've checked this against the given vectors, and a much longer vector with a small number of zeros at the end, to show a case where looping from the start takes a lot of time.
a <- c(1, 0, 0, 0)
b <- c(0, 1, 1, 0, 0, 1)
long <- rep(c(0, 1, 0, 1, 0), c(4, 6, 5, 10000, 3))
czero
is the original function, f1
is the solution by akrun that uses rle
, fczero
starts the loop from the end, and revczero
reverses the vector, then starts from the front.
czero <- function(a) {
k = 0
for(i in 1:length(a)){
if(a[i] == 0) {
k = k + 1
} else k = 0
}
return(k)
}
f1 <- function(vec){
pmax(0, with(rle(vec), lengths[values == 0 &
seq_along(values) == length(values)])[1], na.rm = TRUE)
}
fczero <- function(vec) {
k <- 0L
for (i in length(vec):1) {
if (vec[i] != 0) break
k <- k + 1L
}
return(k)
}
revczero <- function(vec) {
revd <- rev(vec)
k <- 0L
for (i in 1:length(vec)) {
if (revd[i] != 0) break
k <- k + 1L
}
return(k)
}
Time benchmarks are below. EDIT: I've also added Grothendieck's version.
microbenchmark::microbenchmark(czero(a), f1(a), fczero(a), revczero(a), sum(!cumsum(rev(a))), times = 1000)
# Unit: nanoseconds
# expr min lq mean median uq max neval
# czero(a) 0 514 621.035 514 515 21076 1000
# f1(a) 21590 23133 34455.218 27245 30843 3211826 1000
# fczero(a) 0 514 688.892 514 515 28274 1000
# revczero(a) 2570 3085 4626.047 3599 4626 112064 1000
# sum(!cumsum(rev(a))) 2056 2571 3879.630 3085 3599 62201 1000
microbenchmark::microbenchmark(czero(b), f1(b), fczero(b), revczero(b), sum(!cumsum(rev(b))), times = 1000)
# Unit: nanoseconds
# expr min lq mean median uq max neval
# czero(b) 0 514 809.691 514 515 29815 1000
# f1(b) 22104 23647 29372.227 24675 26217 1319583 1000
# fczero(b) 0 0 400.502 0 514 26217 1000
# revczero(b) 2056 2571 3844.176 3085 3599 99727 1000
# sum(!cumsum(rev(b))) 2056 2570 3592.281 3084 3598.5 107952 1000
microbenchmark::microbenchmark(czero(long), f1(long), fczero(long), revczero(long), sum(!cumsum(rev(long))), times = 1000)
# Unit: nanoseconds
# expr min lq mean median uq max neval
# czero(long) 353156 354699 422077.536 383486 443631.0 1106250 1000
# f1(long) 112579 119775 168408.616 132627 165269.5 2068050 1000
# fczero(long) 0 514 855.444 514 1028.0 43695 1000
# revczero(long) 24161 27245 35890.991 29301 36498.0 149591 1000
# sum(!cumsum(rev(long))) 49350 53462 71035.486 56546 71454 2006363 1000