I have a logical vector like
as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1))
but much longer. How can i transform it to:
c(0,0,1,2,3,0,1,2,0,0,0,1,2,3,4)
by counting the length of ones?
Another rle
option:
r <- rle(x)
x[x] <- sequence(r$l[r$v])
#[1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
Or without saving r
:
x[x] <- sequence(with(rle(x), lengths[values]))
with C++ through Rcpp
library(Rcpp)
cppFunction('NumericVector seqOfLogical(LogicalVector lv) {
size_t n = lv.size();
NumericVector res(n);
int foundCounter = 0;
for (size_t i = 0; i < n; i++) {
if (lv[i] == 1) {
foundCounter++;
} else {
foundCounter = 0;
}
res[i] = foundCounter;
}
return res;
}')
seqOfLogical(x)
# [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
library(microbenchmark)
set.seed(1)
x <- sample(c(T,F), size = 1e6, replace = T)
microbenchmark(
symbolix = { symbolix(x) },
thelatemail1 = { thelatemail1(x) },
thelatemail2 = { thelatemail2(x) },
wen = { wen(x) },
maurits = { maurits(x) },
#mhammer = { mhammer(x) }, ## this errors
times = 5
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# symbolix 2.760152 4.579596 34.60909 4.833333 22.31126 138.5611 5
# thelatemail1 154.050925 189.784368 235.16431 235.982093 262.33704 333.6671 5
# thelatemail2 138.876834 146.197278 158.66718 148.547708 179.80223 179.9119 5
# wen 780.432786 898.505231 1091.39099 1093.702177 1279.33318 1404.9816 5
# maurits 1002.267323 1043.590621 1136.35624 1086.967756 1271.38803 1277.5675 5
symbolix <- function(x) {
seqOfLogical(x)
}
thelatemail1 <- function(x) {
r <- rle(x)
x[x] <- sequence(r$l[r$v])
return(x)
}
thelatemail2 <- function(x) {
x[x] <- sequence(with(rle(x), lengths[values]))
return(x)
}
maurits <- function(x) {
unlist(Map(function(l, v) if (!isTRUE(v)) rep(0, l) else 1:l, rle(x)$lengths, rle(x)$values))
}
wen <- function(A) {
B=data.table::rleid(A)
B=ave(B,B,FUN = seq_along)
B[!A]=0
B
}
mhammer <- function(x) {
x_counts <- x
for(i in seq_along(x)) {
if(x[i] == 1) { x_counts[i] <- x_counts[i] + x_counts[i-1] }
}
return(x_counts)
}
You can using rleid
in data.table
A=as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1))
B=data.table::rleid(A)
B=ave(B,B,FUN = seq_along)
B[!A]=0
B
[1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
x <- c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1)
x_counts <- x
for(i in seq_along(x)) {
if(x[i] == 1) { x_counts[i] <- x_counts[i] + x_counts[i-1] }
}
x_counts
Here is a solution using base R's rle
with Map
x <- as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1))
unlist(Map(function(l, v) if (!isTRUE(v)) rep(0, l) else 1:l, rle(x)$lengths, rle(x)$values))
# [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
or using purrr::pmap
library(purrr);
unlist(pmap(unclass(rle(x)),
function(lengths, values) if (!isTRUE(values)) rep(0, lengths) else 1:lengths))
#[1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
slightly different from Wen's, I came up with:
library(data.table)
ave(v,rleid(v),FUN=function(x) x *seq_along(x))
# [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
I recommend runner package and function streak_run
which calculates consecutive occurences. Possible also calculating on sliding windows (eg. last 5 observations), more in github documentation
x <- as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1))
streak <- streak_run(x)
streak[x == 0] <- 0
print(streak)
# [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4