27

I want to write a code to count and sum any positive and negative series of numbers.
Numbers are either positive or negative(no zero).
I have written codes with for loops. Is there any creative alternative?

Data

R

set.seed(100)
x <- round(rnorm(20, sd = 0.02), 3)

python

x = [-0.01, 0.003, -0.002, 0.018, 0.002, 0.006, -0.012, 0.014, -0.017, -0.007,

     0.002, 0.002, -0.004, 0.015, 0.002, -0.001, -0.008, 0.01, -0.018, 0.046]

loops

R

sign_indicator <- ifelse(x > 0, 1,-1)
number_of_sequence <- rep(NA, 20)
n <- 1
for (i in 2:20) {
  if (sign_indicator[i] == sign_indicator[i - 1]) {
    n <- n + 1
  } else{
    n <- 1
  }
  number_of_sequence[i] <- n
  
}
number_of_sequence[1] <- 1

#############################

summation <- rep(NA, 20)

for (i in 1:20) {
  summation[i] <- sum(x[i:(i + 1 - number_of_sequence[i])])
}

python

sign_indicator = [1 if i > 0 else -1 for i in X]

number_of_sequence = [1]
N = 1
for i in range(1, len(sign_indicator)):
    if sign_indicator[i] == sign_indicator[i - 1]:
        N += 1
    else:
        N = 1
    number_of_sequence.append(N)

#############################
summation = []

for i in range(len(X)):
    if number_of_sequence[i] == 1:          
          summation.append(X[i])

    else:
        summation.append(sum(X[(i + 1 - number_of_sequence[i]):(i + 1)]))

result

        x n_of_sequence    sum
1  -0.010             1 -0.010
2   0.003             1  0.003
3  -0.002             1 -0.002
4   0.018             1  0.018
5   0.002             2  0.020
6   0.006             3  0.026
7  -0.012             1 -0.012
8   0.014             1  0.014
9  -0.017             1 -0.017
10 -0.007             2 -0.024
11  0.002             1  0.002
12  0.002             2  0.004
13 -0.004             1 -0.004
14  0.015             1  0.015
15  0.002             2  0.017
16 -0.001             1 -0.001
17 -0.008             2 -0.009
18  0.010             1  0.010
19 -0.018             1 -0.018
20  0.046             1  0.046
Community
  • 1
  • 1
Iman
  • 2,224
  • 15
  • 35

14 Answers14

18

The other solutions look okay but you don't really need to use sophisticated language features or library functions for this simple problem.

result, prev = [], None

for idx, cur in enumerate(x):
    if not prev or (prev > 0) != (cur > 0):
        n, summation = 1, cur
    else:
        n, summation = n + 1, summation + cur
    result.append((idx, cur, n, summation))
    prev = cur

As you can see, you don't really need sign_indicator list, two for-loops or range function as in the snippet in the question section.

If you want index to start from 1, use enumerate(x, 1) instead of enumerate(x)

To see the result, you can run the following code

for idx, num, length, summation in result:
     print(f"{idx:>2d} {num:.3f} {length:>2d} {summation:.3f}")
14

In R, you can use data.tables rleid to create groups with positive and negative series of number and then create a sequence of rows in each group and do a cumulative sum of the x values.

library(data.table)
df <- data.table(x)
df[, c("n_of_sequence", "sum") := list(seq_len(.N), cumsum(x)), by = rleid(sign(x))]
df

#         x n_of_sequence    sum
# 1: -0.010             1 -0.010
# 2:  0.003             1  0.003
# 3: -0.002             1 -0.002
# 4:  0.018             1  0.018
# 5:  0.002             2  0.020
# 6:  0.006             3  0.026
# 7: -0.012             1 -0.012
# 8:  0.014             1  0.014
# 9: -0.017             1 -0.017
#10: -0.007             2 -0.024
#11:  0.002             1  0.002
#12:  0.002             2  0.004
#13: -0.004             1 -0.004
#14:  0.015             1  0.015
#15:  0.002             2  0.017
#16: -0.001             1 -0.001
#17: -0.008             2 -0.009
#18:  0.010             1  0.010
#19: -0.018             1 -0.018
#20:  0.046             1  0.046

We can use rleid in dplyr as well to create groups and do the same.

library(dplyr)
df %>%
  group_by(gr = data.table::rleid(sign(x))) %>%
  mutate(n_of_sequence = row_number(), sum = cumsum(x))
Oliver
  • 8,169
  • 3
  • 15
  • 37
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
10

You can calculate the run lengths of each sign using rle from base to and do something like this.

set.seed(0)
z <- round(rnorm(20, sd = 0.02), 3)
run_lengths <- rle(sign(z))$lengths
run_lengths
# [1] 1 1 1 3 1 1 2 2 1 2 2 1 1 1

To get n_of_sequence

n_of_sequence <- run_lengths %>% map(seq) %>% unlist
n_of_sequence
# [1] 1 1 1 1 2 3 1 1 1 2 1 2 1 1 2 1 2 1 1 1

Finally, to get the summations of the sequences,

start <- cumsum(c(1,run_lengths))
start <- start[-length(start)] # start points of each series 
map2(start,run_lengths,~cumsum(z[.x:(.x+.y-1)])) %>% unlist()
# [1] -0.010  0.003 -0.002  0.018  0.020  0.026 -0.012  0.014 -0.017 -0.024
# [11]  0.002  0.004 -0.004  0.015  0.017 -0.001 -0.009  0.010 -0.018  0.046
Ameer
  • 496
  • 4
  • 5
7

I recommend R package runner for this kind of operations. streak_run calculates consecutive occurrence of the same value, and sum_run calculates sum in window which length is defined by k argument.

Here is solution:

set.seed(100)
x <- round(rnorm(20, sd = 0.02), 3)

n_of_sequence <- runner::streak_run(x > 0)
sum <- runner::sum_run(x, k = n_of_sequence)

data.frame(x, n_of_sequence, sum)

#         x n_of_sequence    sum
# 1  -0.010             1 -0.010
# 2   0.003             1  0.003
# 3  -0.002             1 -0.002
# 4   0.018             1  0.018
# 5   0.002             2  0.020
# 6   0.006             3  0.026
# 7  -0.012             1 -0.012
# 8   0.014             1  0.014
# 9  -0.017             1 -0.017
# 10 -0.007             2 -0.024
# 11  0.002             1  0.002
# 12  0.002             2  0.004
# 13 -0.004             1 -0.004
# 14  0.015             1  0.015
# 15  0.002             2  0.017
# 16 -0.001             1 -0.001
# 17 -0.008             2 -0.009
# 18  0.010             1  0.010
# 19 -0.018             1 -0.018
# 20  0.046             1  0.046

Below benchmark to compare actual solutions

set.seed(0)
x <- round(rnorm(10000, sd = 0.02), 3)

library(runner)
runner_streak <- function(x) {
  n_of_sequence <- streak_run(x > 0)
  sum <- sum_run(x, k = n_of_sequence)
}

library(data.table)
dt <- data.table(x)
dt_streak <- function(dt) {
  dt[, c("n_of_sequence", "sum") := list(seq_len(.N), cumsum(x)),rleid(sign(x))]
}

rle_streak <- function(x) {
  run_lengths <- rle(sign(x))$lengths
  run_lengths

  n_of_sequence <- run_lengths %>% map(seq) %>% unlist

  start <- cumsum(c(1,run_lengths))
  start <- start[-length(start)]
  sum <- map2(start,run_lengths,~cumsum(x[.x:(.x+.y-1)])) %>% unlist()
}

library(tidyverse)
df <- tibble(x = x)
tv_streak <- function(x) {
  res <- df %>%
    mutate(seqno = cumsum(c(1, diff(sign(x)) != 0))) %>%
    group_by(seqno) %>%
    mutate(n_of_sequence = row_number(),
           sum = cumsum(x)) %>%
    ungroup() %>% 
    select(-seqno)  
}

count_and_sum <- function(x) {
  runs   <- rle((x > 0) * 1)$lengths
  groups <- split(x, rep(1:length(runs), runs))
  output <- function(group) 
    data.frame(x = group, n = seq_along(group), sum = cumsum(group))
  result <- as.data.frame(do.call(rbind, lapply(groups, output)))
  `rownames<-`(result, 1:nrow(result))
}
microbenchmark::microbenchmark(
  runner_streak(x),
  dt_streak(dt),
  rle_streak(x),
  tv_streak(df),
  count_and_sum(x),
  times = 100L
)


# Unit: milliseconds
#             expr         min          lq        mean      median          uq        max neval
# runner_streak(x)    4.240192    4.833563    6.321697    5.300817    6.543926   14.80221   100
#    dt_streak(dt)    7.648100    8.587887   10.862806    9.650483   11.295488   34.66027   100
#    rle_streak(x)   42.321506   55.397586   64.195692   63.404403   67.813738  167.71444   100
#    tv_streak(df)   31.398885   36.333751   45.141452   40.800077   45.756279  163.19535   100
# count_and_sum(x) 1691.438977 1919.518282 2306.036783 2149.543281 2499.951020 6158.43384   100
GoGonzo
  • 2,637
  • 1
  • 18
  • 25
  • 1
    measuring in microseconds doesn't make much sense. Some functions have an initial overhead in microseconds but they will scale for big data sets much better than others. Also `df <- data.table(x)` is a full data copy. Also, you are printing the data in some examples (which is a another full copy) while not in others. – David Arenburg Feb 20 '20 at 14:50
  • some of the functions return different objects - some vectors and some dataframes - so it is still not a quite fair benchmark. Also some give different results. Try `r = runner_streak(x); d = dt_streak(dt) ; all.equal(r, d$sum)`. Only checked a few bbut `tv_streak` gives the same as `dt_streak` ; `count_and_sum` gives the same as `runner_streak` which are different from the preceding two. – user2957945 Feb 23 '20 at 01:03
6

Here's a simple non-looping function in R:

count_and_sum <- function(x)
{
  runs   <- rle((x > 0) * 1)$lengths
  groups <- split(x, rep(1:length(runs), runs))
  output <- function(group) data.frame(x = group, n = seq_along(group), sum = cumsum(group))
  result <- as.data.frame(do.call(rbind, lapply(groups, output)))
  `rownames<-`(result, 1:nrow(result))
}

So you can do:

set.seed(100)
x <- round(rnorm(20, sd = 0.02), 3)
count_and_sum(x)
#>         x n    sum
#> 1  -0.010 1 -0.010
#> 2   0.003 1  0.003
#> 3  -0.002 1 -0.002
#> 4   0.018 1  0.018
#> 5   0.002 2  0.020
#> 6   0.006 3  0.026
#> 7  -0.012 1 -0.012
#> 8   0.014 1  0.014
#> 9  -0.017 1 -0.017
#> 10 -0.007 2 -0.024
#> 11  0.002 1  0.002
#> 12  0.002 2  0.004
#> 13 -0.004 1 -0.004
#> 14  0.015 1  0.015
#> 15  0.002 2  0.017
#> 16 -0.001 1 -0.001
#> 17 -0.008 2 -0.009
#> 18  0.010 1  0.010
#> 19 -0.018 1 -0.018
#> 20  0.046 1  0.046

Created on 2020-02-16 by the reprex package (v0.3.0)

Allan Cameron
  • 147,086
  • 7
  • 49
  • 87
6

Two different lazy solutions in Python, using the itertools module.

Using itertools.groupby (and accumulate)

from itertools import accumulate, groupby

result = (
    item
    for _, group in groupby(x, key=lambda n: n < 0)
    for item in enumerate(accumulate(group), 1)
)

Using itertools.accumulate with a custom accumulation function

from itertools import accumulate

def sign_count_sum(count_sum, value):
    count, prev_sum = count_sum
    same_sign = (prev_sum < 0) is (value < 0)
    if same_sign:
        return count + 1, prev_sum + value
    else:
        return 1, value

result = accumulate(x, sign_count_sum, initial=(0, 0))
next(result)  # needed to skip the initial (0, 0) item

The initial keyword argument was added in Python 3.8. In earlier versions you can use itertools.chain to prepend the (0,0)-tuple:

result = accumulate(chain([(0, 0)], x), sign_count_sum)

The output is as expected:

for (i, v), (c, s) in zip(enumerate(x), result):
    print(f"{i:3} {v:7.3f} {c:3} {s:7.3f}")
  0  -0.010   1  -0.010
  1   0.003   1   0.003
  2  -0.002   1  -0.002
  3   0.018   1   0.018
  4   0.002   2   0.020
  5   0.006   3   0.026
  6  -0.012   1  -0.012
  7   0.014   1   0.014
  8  -0.017   1  -0.017
  9  -0.007   2  -0.024
 10   0.002   1   0.002
 11   0.002   2   0.004
 12  -0.004   1  -0.004
 13   0.015   1   0.015
 14   0.002   2   0.017
 15  -0.001   1  -0.001
 16  -0.008   2  -0.009
 17   0.010   1   0.010
 18  -0.018   1  -0.018
 19   0.046   1   0.046
schot
  • 10,958
  • 2
  • 46
  • 71
5

Here is a simple tidyverse solution...

library(tidyverse) #or just dplyr and tidyr

set.seed(100)
x <- round(rnorm(20, sd = 0.02), 3)

df <- tibble(x = x) %>% 
  mutate(seqno = cumsum(c(1, diff(sign(x)) != 0))) %>% #identify sequence ids
  group_by(seqno) %>%                                  #group by sequences
  mutate(n_of_sequence = row_number(),                 #count row numbers for each group
         sum = cumsum(x)) %>%                          #cumulative sum for each group
  ungroup() %>% 
  select(-seqno)                                       #remove sequence id

df
# A tibble: 20 x 3
        x n_of_sequence     sum
    <dbl>         <int>   <dbl>
 1 -0.01              1 -0.01  
 2  0.003             1  0.003 
 3 -0.002             1 -0.002 
 4  0.018             1  0.018 
 5  0.002             2  0.0200
 6  0.006             3  0.026 
 7 -0.012             1 -0.012 
 8  0.014             1  0.014 
 9 -0.017             1 -0.017 
10 -0.007             2 -0.024 
11  0.002             1  0.002 
12  0.002             2  0.004 
13 -0.004             1 -0.004 
14  0.015             1  0.015 
15  0.002             2  0.017 
16 -0.001             1 -0.001 
17 -0.008             2 -0.009 
18  0.01              1  0.01  
19 -0.018             1 -0.018 
20  0.046             1  0.046 
Andrew Gustar
  • 17,295
  • 1
  • 22
  • 32
5

As for Python, someone will come up with a solution using the pandas library. In the meantime, here is a simple proposal:

class Combiner:
    def __init__(self):
        self.index = self.seq_index = self.summation = 0

    def combine(self, value):
        self.index += 1
        if value * self.summation <= 0:
            self.seq_index = 1
            self.summation = value
        else:
            self.seq_index += 1
            self.summation += value
        return self.index, value, self.seq_index, self.summation

c = Combiner()
lst = [c.combine(v) for v in x]

for t in lst:
    print(f"{t[0]:3} {t[1]:7.3f} {t[2]:3} {t[3]:7.3f}")

Output:

  1  -0.010   1  -0.010
  2   0.003   1   0.003
  3  -0.002   1  -0.002
  4   0.018   1   0.018
  5   0.002   2   0.020
  6   0.006   3   0.026
  7  -0.012   1  -0.012
  8   0.014   1   0.014
  9  -0.017   1  -0.017
 10  -0.007   2  -0.024
 11   0.002   1   0.002
 12   0.002   2   0.004
 13  -0.004   1  -0.004
 14   0.015   1   0.015
 15   0.002   2   0.017
 16  -0.001   1  -0.001
 17  -0.008   2  -0.009
 18   0.010   1   0.010
 19  -0.018   1  -0.018
 20   0.046   1   0.046

If you need separate lists, you can do

idxs, vals, seqs, sums = (list(tpl) for tpl in zip(*lst))

or, if iterators are OK, simply

idxs, vals, seqs, sums = zip(*lst)

(explanation here)

Walter Tross
  • 12,237
  • 2
  • 40
  • 64
4

In R, you could also do:

# DATA
set.seed(100)
x <- round(rnorm(20, sd = 0.02), 3)

library(data.table)
dt <- data.table(x = x)

# Create Positive or Negative variable
dt$x_logical <- ifelse(dt$x > 0, "P", "N")

# Create a reference data.frame/table to keep continuous counts
seq_dt <- data.frame(val = rle(x = dt$x_logical)$lengths)
seq_dt$id <- 1:nrow(seq_dt)

# Map id in the main data.table and get cumulative sum
dt$id <- rep(seq_dt$id, seq_dt$val)
dt[, csum := cumsum(x), by = "id"]


        x x_logical id   csum
 1: -0.010         N  1 -0.010
 2:  0.003         P  2  0.003
 3: -0.002         N  3 -0.002
 4:  0.018         P  4  0.018
 5:  0.002         P  4  0.020
 6:  0.006         P  4  0.026
 7: -0.012         N  5 -0.012
 8:  0.014         P  6  0.014
 9: -0.017         N  7 -0.017
10: -0.007         N  7 -0.024
11:  0.002         P  8  0.002
12:  0.002         P  8  0.004
13: -0.004         N  9 -0.004
14:  0.015         P 10  0.015
15:  0.002         P 10  0.017
16: -0.001         N 11 -0.001
17: -0.008         N 11 -0.009
18:  0.010         P 12  0.010
19: -0.018         N 13 -0.018
20:  0.046         P 14  0.046
MKa
  • 2,248
  • 16
  • 22
4

Throwing my [r] answer in the hat, optimized for speed and works with any length of x (unlike the asker's which was hard coded for length 20):

### data 
set.seed(100)
x <- round(rnorm(20, sd = 0.02), 3)

### solution
summation <- c(x[1])
enn <- 1
n_of_seq <- c(enn)
for(i in 2:length(x)){
  first <- x[i]
  second <- summation[i - 1]

  if(sign(first) == sign(second)){
    summation <- c(summation, first + second)
    enn <- enn + 1
  }else{
    summation <- c(summation, first)
    enn <- 1

  }
  n_of_seq <- c(n_of_seq, enn)
  }

And, to compare run times on my current (very slow) work computer, here's the output of my microbenchmark using all of the R solutions in this thread. Unsurprisingly, the solutions making the most copies and conversions tended to be slower.

Unit: microseconds
         expr      min       lq       mean    median       uq      max neval
     my_way()   13.301   19.200   23.38352   21.4010   23.401  20604.0 1e+05
 author_way()   19.702   31.701   40.12371   36.0015   40.502  24393.9 1e+05
      ronak()  856.401 1113.601 1305.36419 1236.8010 1377.501 453191.4 1e+05
      ameer()  388.501  452.002  553.08263  491.3000  548.701 456156.6 1e+05
     andrew() 2007.801 2336.801 2748.57713 2518.1510 2760.302 463175.8 1e+05
      gonzo()   21.901   35.502   48.84946   43.9010   51.001  29519.5 1e+05

--------------EDIT-------------- It was pointed out by @nicola that my solution is not the fastest for longer lengths of x - which should be fairly obvious since I'm continually making copies of vectors by using calls like x <- c(x, y). I only created the fastest solution for lengths = 20 and just microbenchmarked as low as I could go for that.

To make a fairer comparison I edited all versions to generate the original code in the way I believe would be fastest, but I welcome feedback on that. Here is my full benchmarking code and results for my very slow system. I welcome any feedback.

# originally benchmarked a few different lengths
for(pie in c(100000)){


my_way<- function(){
  set.seed(100)
  x <- round(rnorm(pie, sd = 0.02), 3)
summation <- c(x[1])
enn <- 1
n_of_seq <- c(enn)
for(i in 2:length(x)){
  first <- x[i]
  second <- summation[i - 1]

  if(sign(first) == sign(second)){
    summation <- c(summation, first + second)
    enn <- enn + 1
  }else{
    summation <- c(summation, first)
    enn <- 1

  }
  n_of_seq <- c(n_of_seq, enn)
  }

# print(summation)
}




author_way <- function(){
  set.seed(100)
  x <- round(rnorm(pie, sd = 0.02), 3)

  sign_indicator <- ifelse(x > 0, 1,-1)
  sky <- length(x)
  number_of_sequence <- rep(NA, sky)
  n <- 1
  for (i in 2:sky) {
    if (sign_indicator[i] == sign_indicator[i - 1]) {
      n <- n + 1
    } else{
      n <- 1
    }
    number_of_sequence[i] <- n

  }
  number_of_sequence[1] <- 1

  #############################

  summation <- rep(NA, sky)

  for (i in 1:sky) {
    summation[i] <- sum(x[i:(i + 1 - number_of_sequence[i])])
  }
}


# other ppls solutions:




ronak <- function(){
df <- data.table('x' = round(rnorm(pie, sd = 0.02), 3))
df[, c("n_of_sequence", "sum") := list(seq_len(.N), cumsum(x)),rleid(sign(x))]
}



ameer <- function(){
  set.seed(100)
  x <- round(rnorm(pie, sd = 0.02), 3)
  run_lengths <- rle(sign(x))$lengths
  n_of_sequence <- run_lengths %>% map(seq) %>% unlist
  start <- cumsum(c(1,run_lengths))
  start <- start[-length(start)] # start points of each series 
  map2(start,run_lengths,~cumsum(x[.x:(.x+.y-1)])) %>% unlist()

}


count_and_sum <- function(x){
  set.seed(100)
  x <- round(rnorm(pie, sd = 0.02), 3)
  runs   <- rle((x > 0) * 1)$lengths
  groups <- split(x, rep(1:length(runs), runs))
  output <- function(group) data.frame(x = group, n = seq_along(group), sum = cumsum(group))
  result <- as.data.frame(do.call(rbind, lapply(groups, output)))
  `rownames<-`(result, 1:nrow(result))
}



andrew <- function(){
  set.seed(100)
  df <- tibble(x = round(rnorm(pie, sd = 0.02), 3)) %>% 
    mutate(seqno = cumsum(c(1, diff(sign(x)) != 0))) %>% #identify sequence ids
    group_by(seqno) %>%                                  #group by sequences
    mutate(n_of_sequence = row_number(),                 #count row numbers for each group
           sum = cumsum(x)) %>%                          #cumulative sum for each group
    ungroup() %>% 
    select(-seqno) 
}

gonzo <- function(){
  set.seed(100)
  x <- round(rnorm(pie, sd = 0.02), 3)
  n_of_sequence <- runner::streak_run(x > 0)
  sum <- runner::sum_run(x, k = n_of_sequence)
}



mi1 <- microbenchmark(my_way(), author_way(), ronak(), ameer(), andrew(), gonzo(), times = 10)
print(mi1)

}

As these results show, for other lengths than what I optimized for, my version is slow. The longer x is, the slower it gets up to ridiculously slow at everything above 1000. My favorite version is Ronak's which is only the second fastest on my system. GoGonzo is the fastest on my machine by far at these longer lengths.

Unit: milliseconds
         expr        min         lq        mean      median         uq        max neval
     my_way() 21276.9027 21428.2694 21604.30191 21581.97970 21806.9543 21896.7105    10
 author_way()    82.2465    83.0873    89.42343    84.78315    85.3638   115.4550    10
      ronak()    68.3922    69.3067    70.41924    69.84625    71.3509    74.7070    10
      ameer()   481.4566   509.7552   521.19034   514.77000   530.1121   579.4707    10
     andrew()   200.9654   202.1898   210.84914   206.20465   211.2006   233.7618    10
      gonzo()    27.3317    28.2550    28.66679    28.50535    28.9104    29.9549    10
  • Also the other answers work for any length and your benchmark must have some issue. With respect to the `data.table` @Ronak's solution, yours is order of magnitudes slower for a length of ~100000. – nicola Feb 21 '20 at 11:17
  • Thanks @nicola, I only said that the asker's solution worked for only 20 items not that any other solution didn't - they in fact do. I also optimized the speed for the length of 20 items so my claim for being the fastest ends there. For what it's worth, I liked Ronaks solution the best too but the author explicitly asked for more different ways of solving the problem. Ronak's is already faster for a length of 1000 as well. – Adverse_Event Feb 21 '20 at 13:56
  • And to expand on the microbenchmark. I recoded my benchmark so that every solution was creating (x) in the format they are using, so those that make tibbles generate x in the tibble call, same for data.table etc. I recoded the asker's original solution so work for any lengths (just saving the length of x in a variable and replacing the 20 with it. I then ran it for a length of 100.000 for 10 iterations. Note, that my computer is suuuuper slow, it's running on a 5th gen inter processor with ddr3 at 1600 mHz. I'm editing my post with those results. – Adverse_Event Feb 21 '20 at 14:06
2

In Python, apart from defining a class to store the memory variables, you can use a closure to achieve the same.

def run():
    count = 0
    last_sign = 0

    def sign(i):
        return 1 if i > 0 else -1

    def f(i):
        nonlocal count
        nonlocal last_sign
        if sign(i) == last_sign:
            count = count+1
        else:
            last_sign = sign(i)
            count = 1
        return count

    return f

f = run()
y = [f(i) for i in x]

Note this works for Python 3 only (in Python 2 I think you cannot modify the closure variable like this). Similar thing for summation as well.

Prodipta Ghosh
  • 509
  • 4
  • 14
2

Here's another base R approach:

data.frame(x,
           n = sequence(rle(sign(x))$lengths),
           sum = Reduce(function(x, y) if (sign(x) == sign(y)) x + y else y, x, accumulate = TRUE))

        x n    sum
1  -0.010 1 -0.010
2   0.003 1  0.003
3  -0.002 1 -0.002
4   0.018 1  0.018
5   0.002 2  0.020
6   0.006 3  0.026
7  -0.012 1 -0.012
8   0.014 1  0.014
9  -0.017 1 -0.017
10 -0.007 2 -0.024
11  0.002 1  0.002
12  0.002 2  0.004
13 -0.004 1 -0.004
14  0.015 1  0.015
15  0.002 2  0.017
16 -0.001 1 -0.001
17 -0.008 2 -0.009
18  0.010 1  0.010
19 -0.018 1 -0.018
20  0.046 1  0.046
Ritchie Sacramento
  • 29,890
  • 4
  • 48
  • 56
2

I think a loop would be easier to read, but just for fun, here's a solution in Python using recursion:

x = [-0.01, 0.003, -0.002, 0.018, 0.002, 0.006, -0.012, 0.014, -0.017, -0.007, 0.002, 0.002, -0.004, 0.015, 0.002,
     -0.001, -0.008, 0.01, -0.018, 0.046]


def sign(number):
    return 1 if number > 0 else -1


def sum_previous(pos, result=None):
    if not result:
        result = x[pos]
    else:
        result += x[pos]
    if pos == 0 or sign(x[pos]) != sign(x[pos-1]):
        return result
    else:
        return sum_previous(pos-1, result)


results = [sum_previous(i) for i in range(len(x))]
print(results)
RogB
  • 441
  • 1
  • 4
  • 14
2

A simple python answer, ignores the 0 case:

x = [-0.01, 0.003, -0.002, 0.018, 
     0.002, 0.006, -0.012, 0.014, 
     -0.017, -0.007, 0.002, 0.002, 
     -0.004, 0.015, 0.002, -0.001, 
     -0.008, 0.01, -0.018, 0.046]

count = 0
sign_positive = x[0] > 0
sign_count = []
for n in x:
    # the idea is to keep track of the sign and increment the 
    # count if it agrees with the current number we are looking at
    if (n > 0 and sign_positive) or (n < 0 and not sign_positive):
        count = count + 1
    # if it does not, the count goes back to 1
    else:
        count = 1
    # Whether we increased the count or not, we update whether the
    # sign was positive or negative
    sign_positive = n > 0
    sign_count.append(count)

# This is just to reproduce the output 
# (although I find the last repetition of the number unnecessary)    
results = list(zip(x, sign_count))
for i, result in enumerate(results):
    print(f"{i: >2d} {result[0]: .3f} {result[1]: >2d} {result[0]: .3f}")

 0 -0.010  1 -0.010
 1  0.003  1  0.003
 2 -0.002  1 -0.002
 3  0.018  1  0.018
 4  0.002  2  0.002
 5  0.006  3  0.006
 6 -0.012  1 -0.012
 7  0.014  1  0.014
 8 -0.017  1 -0.017
 9 -0.007  2 -0.007
10  0.002  1  0.002
11  0.002  2  0.002
12 -0.004  1 -0.004
13  0.015  1  0.015
14  0.002  2  0.002
15 -0.001  1 -0.001
16 -0.008  2 -0.008
17  0.010  1  0.010
18 -0.018  1 -0.018
19  0.046  1  0.046

A little more sophisticated solution, also takes care of the 0 case:

# To test the 0 case I am changing two numbers to 0
x = [-0.01, 0.003, -0.002, 0.018, 
     0.002, 0.006, -0.012, 0.014, 
    -0.017, -0.007, 0, 0, 
    -0.004, 0.015, 0.002, -0.001, 
    -0.008, 0.01, -0.018, 0.046]

# The rest is similar
count = 0
# This time we are using a nested ternary assignment 
# to account for the case of 0
# This would be more readable as a function, 
# but what it does is simple
# It returns None if n is 0, 
# True if it is larger than 0 
# and False if it less than 0
sign_positive = None if n == 0 else False if n < 0 else True
sign_count = []
for n in x:
    # We add the case of 0 by adding a third condition where
    # sign_positive was None (meaning the previous
    # number was 0) and the current number is 0.
    if (n > 0 and sign_positive) or \
       (n < 0 and not sign_positive) or \
       (n == 0 and sign_positive == None):
        count = count + 1
    else:
        count = 1
    sign_positive = None if n == 0 else False if n < 0 else True
    sign_count.append(count)
results = list(zip(x, sign_count))
for i, result in enumerate(results):
    print(f"{i: >2d} {result[0]: .3f} {result[1]: >2d} {result[0]: .3f}")

 0 -0.010  1 -0.010
 1  0.003  1  0.003
 2 -0.002  1 -0.002
 3  0.018  1  0.018
 4  0.002  2  0.002
 5  0.006  3  0.006
 6 -0.012  1 -0.012
 7  0.014  1  0.014
 8 -0.017  1 -0.017
 9 -0.007  2 -0.007
10  0.000  1  0.000
11  0.000  2  0.000
12 -0.004  3 -0.004
13  0.015  1  0.015
14  0.002  2  0.002
15 -0.001  1 -0.001
16 -0.008  2 -0.008
17  0.010  1  0.010
18 -0.018  1 -0.018
19  0.046  1  0.046
Sinan Kurmus
  • 585
  • 3
  • 11