This can be done in a simple lapply
in one line
lapply(diff(c(0, which(x))), function(x) c(rep(FALSE, (x-1)), TRUE))
#[[1]]
#[1] FALSE FALSE FALSE TRUE
#[[2]]
#[1] FALSE FALSE TRUE
#[[3]]
#[1] FALSE FALSE FALSE TRUE
#[[4]]
#[1] FALSE TRUE
Explanation
which(x)
gives us the position of the TRUE
values (4, 7, 11, 13
)
- starting from 0, we want the difference between each TRUE (which is essentially the count of
FALSE
) - diff(c(0, which(x)))
- 4 3 4 2
- For each of these values we want a vector that is
length(x)
, with x - 1
FALSE
values, and 1
TRUE
- c(rep(FALSE, (x-1)), TRUE)
- the
lapply
does this for each of the 4 3 4 2
values, and returns a list
Benchmarking
Comparing the solutions
library(microbenchmark)
splitAt <- function(x, pos) unname(split(x, cumsum(seq_along(x) %in% pos)))
microbenchmark(
splitAt(x, which(x)+1),
{r <- rle(x)$lengths
lapply(r[seq(1,length(r), by=2)] , function(x) c(rep(FALSE, x), TRUE))},
split(x, sum(x) - rev(cumsum(rev(x))) ),
trimws(strsplit(paste(x, collapse=" "), "(?<=TRUE)", perl=T)[[1]]),
lapply(diff(c(0, which(x))), function(x) c(rep(FALSE, (x-1)), TRUE))
)
# min lq mean median uq max neval
# 83.827 86.3910 91.76449 88.9155 92.8350 155.722 100
# 94.373 97.6275 105.10872 101.1455 105.8545 307.927 100
# 85.532 88.0660 93.59524 91.7935 95.3715 126.419 100
#145.233 147.8755 152.65975 150.3250 156.5910 177.807 100
# 26.451 29.6130 31.81785 31.0470 33.1895 43.267 100
Data
x <- c(F, F, F, T, F , F, T, F, F, F, T, F, T)