4

I have a logical vector like

as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1))

but much longer. How can i transform it to:

c(0,0,1,2,3,0,1,2,0,0,0,1,2,3,4)

by counting the length of ones?

RCP9
  • 89
  • 5

7 Answers7

8

Another rle option:

r <- rle(x)
x[x] <- sequence(r$l[r$v])
#[1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4

Or without saving r:

x[x] <- sequence(with(rle(x), lengths[values]))
thelatemail
  • 91,185
  • 12
  • 128
  • 188
5

with C++ through Rcpp

library(Rcpp)

cppFunction('NumericVector seqOfLogical(LogicalVector lv) {
  size_t n = lv.size();
  NumericVector res(n);
  int foundCounter = 0;
  for (size_t i = 0; i < n; i++) {
    if (lv[i] == 1) {
      foundCounter++;
    } else {
      foundCounter = 0;
    }
    res[i] = foundCounter;
  }
  return res;
}')

seqOfLogical(x)

# [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4

Benchmarks

library(microbenchmark)

set.seed(1)
x <- sample(c(T,F), size = 1e6, replace = T)

microbenchmark(
    symbolix = { symbolix(x) }, 
    thelatemail1 = { thelatemail1(x) },
    thelatemail2 = { thelatemail2(x) },
    wen = { wen(x) },
    maurits = { maurits(x) },
    #mhammer = { mhammer(x) },   ## this errors
    times = 5
)

# Unit: milliseconds
#         expr         min          lq       mean      median         uq       max neval
#     symbolix    2.760152    4.579596   34.60909    4.833333   22.31126  138.5611     5
# thelatemail1  154.050925  189.784368  235.16431  235.982093  262.33704  333.6671     5
# thelatemail2  138.876834  146.197278  158.66718  148.547708  179.80223  179.9119     5
#          wen  780.432786  898.505231 1091.39099 1093.702177 1279.33318 1404.9816     5
#      maurits 1002.267323 1043.590621 1136.35624 1086.967756 1271.38803 1277.5675     5

functions

symbolix <- function(x) {
    seqOfLogical(x)
}

thelatemail1 <- function(x) {
    r <- rle(x)
    x[x] <- sequence(r$l[r$v])
    return(x)
}

thelatemail2 <- function(x) {
    x[x] <- sequence(with(rle(x), lengths[values]))
    return(x)
}

maurits <- function(x) {
    unlist(Map(function(l, v) if (!isTRUE(v)) rep(0, l) else 1:l, rle(x)$lengths, rle(x)$values))
}

wen <- function(A) {
    B=data.table::rleid(A)
    B=ave(B,B,FUN = seq_along)
    B[!A]=0
    B
}

mhammer <- function(x) {
    x_counts <- x
    for(i in seq_along(x)) {
      if(x[i] == 1) { x_counts[i] <- x_counts[i] + x_counts[i-1] }
    }
    return(x_counts)
}
SymbolixAU
  • 25,502
  • 4
  • 67
  • 139
  • 1
    Cheers for the benchmark testing (+1)! I'm moderately ashamed of making the bottom of the list; must be performance anxiety;-) – Maurits Evers Jun 27 '18 at 08:08
  • 1
    @MauritsEvers We all have it from time to time :) I like these types of 'looping' questions for showing how a simple knowledge of C/C++ can drastically speed-up your code. – SymbolixAU Jun 28 '18 at 00:30
1

You can using rleid in data.table

A=as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1))
B=data.table::rleid(A)
B=ave(B,B,FUN = seq_along)
B[!A]=0
B
[1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
BENY
  • 317,841
  • 20
  • 164
  • 234
0
x <- c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1)
x_counts <- x
for(i in seq_along(x)) {
  if(x[i] == 1) { x_counts[i] <- x_counts[i] + x_counts[i-1] }
}
x_counts
MHammer
  • 1,274
  • 7
  • 12
0

Here is a solution using base R's rle with Map

x <- as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1))
unlist(Map(function(l, v) if (!isTRUE(v)) rep(0, l) else 1:l, rle(x)$lengths, rle(x)$values))
# [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4

or using purrr::pmap

library(purrr);
unlist(pmap(unclass(rle(x)), 
    function(lengths, values) if (!isTRUE(values)) rep(0, lengths) else 1:lengths))
#[1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
Maurits Evers
  • 49,617
  • 4
  • 47
  • 68
0

slightly different from Wen's, I came up with:

library(data.table)
ave(v,rleid(v),FUN=function(x) x *seq_along(x))
# [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
moodymudskipper
  • 46,417
  • 11
  • 121
  • 167
0

I recommend runner package and function streak_run which calculates consecutive occurences. Possible also calculating on sliding windows (eg. last 5 observations), more in github documentation

x <- as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1))

streak <- streak_run(x)
streak[x == 0] <- 0

print(streak)
# [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
GoGonzo
  • 2,637
  • 1
  • 18
  • 25