2

This question is a continuation of this one.

Given a data.table, I would like to extract cumulative unique elements until it reachs three unique values OR when triggered by t , than reset and resume:

y <- data.table(  a = c (1, 2, 2, 3, 3, 4, 3, 2, 2, 5, 6, 7, 9, 8)
                , t = c (F, F, F, F, F, F, F, T, F, F, F, F, F, F))

The derired output is:

a     t output
1 FALSE      1
2 FALSE    1 2
2 FALSE    1 2
3 FALSE  1 2 3
3 FALSE  1 2 3
4 FALSE      4    # 4 is the forth element, so it resets and start again
3 FALSE    3 4
2  TRUE      2    # because `t` is `TRUE` it resets and start again 
2 FALSE      2
5 FALSE    2 5
6 FALSE  2 5 6
7 FALSE      7   # 7 is the forth element, so it resets and start again
9 FALSE    7 8
8 FALSE  7 8 9

Based on "thelatemail" solution in the link, I tried the following function:

unionlim_trigger <- function(x,y,n=4, trigger = FALSE) {
  u <- union(x,y)
  if(length(u) == n | trigger == TRUE) y else u
}

However, when I apply through:

y[, out := sapply(Reduce(function(x,y,trigger) unionlim_trigger(x=x, y = y, trigger = t), a, accumulate=TRUE), paste, collapse=" ")]

I get the warnings:

In if (length(u) == n | trigger == T) y else u :
the condition has length > 1 and only the first element will be used

I understand that this happens because instead of passing the i-th element of t, I am passing the whole vector.

How do I solve that? I tried using mapply and the instruction below, with no success:

y[, out := sapply(Reduce(function(x,y,trigger) unionlim_trigger(x=x, y = y, trigger = t[.I]), a, accumulate=TRUE), paste, collapse=" ")]
thelatemail
  • 91,185
  • 12
  • 128
  • 188
Fabio Correa
  • 1,257
  • 1
  • 11
  • 17

3 Answers3

2

Here is one approach :

library(data.table)

#Create a group column for every reset

y[, group := cumsum(t)]

#Function
unionlim_trigger <- function(x,y,n=4) {
  u <- union(x,y)
  if(length(u) >= n) y else sort(u)
}

#appply it for each group
y[, output := sapply(Reduce(unionlim_trigger, a, accumulate = TRUE), 
              paste0, collapse = ' '), group]
y

#    a     t group output
# 1: 1 FALSE     0      1
# 2: 2 FALSE     0    1 2
# 3: 2 FALSE     0    1 2
# 4: 3 FALSE     0  1 2 3
# 5: 3 FALSE     0  1 2 3
# 6: 4 FALSE     0      4
# 7: 3 FALSE     0    3 4
# 8: 2  TRUE     1      2
# 9: 2 FALSE     1      2
#10: 5 FALSE     1    2 5
#11: 6 FALSE     1  2 5 6
#12: 7 FALSE     1      7
#13: 9 FALSE     1    7 9
#14: 8 FALSE     1  7 8 9
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
2

A bit of adjustment is required to the Reduce function and the subsequent passing through of multiple arguments.

unionlim <- function(x, y, t, n=4) {
  u <- union(x,y)
  if(length(u) == n | t) y else sort(u)
}

y[, 
  out := sapply(Reduce(
    function(x, args) unionlim(x, args[1], args[2]), 
    Map(c, y$a[-1], y$t[-1]), init = y$a[1], accumulate=TRUE
  ), paste, collapse=" ")
]

#    a     t   out
# 1: 1 FALSE     1
# 2: 2 FALSE   1 2
# 3: 2 FALSE   1 2
# 4: 3 FALSE 1 2 3
# 5: 3 FALSE 1 2 3
# 6: 4 FALSE     4
# 7: 3 FALSE   3 4
# 8: 2  TRUE     2
# 9: 2 FALSE     2
#10: 5 FALSE   2 5
#11: 6 FALSE 2 5 6
#12: 7 FALSE     7
#13: 9 FALSE   7 9
#14: 8 FALSE 7 8 9
thelatemail
  • 91,185
  • 12
  • 128
  • 188
1

Another way is:

y[, output := Reduce(function(x, y)
  if(grepl(sprintf("\\b%d\\b",y), x)) x 
  else if (stringr::str_count(x, ",")==2) y
  else paste(x, y, sep=","), a, accumulate = TRUE),
  by = cumsum(t)]

y
    a     t output
 1: 1 FALSE      1
 2: 2 FALSE    1,2
 3: 2 FALSE    1,2
 4: 3 FALSE  1,2,3
 5: 3 FALSE  1,2,3
 6: 4 FALSE      4
 7: 3 FALSE    4,3
 8: 2  TRUE      2
 9: 2 FALSE      2
10: 5 FALSE    2,5
11: 6 FALSE  2,5,6
12: 7 FALSE      7
13: 9 FALSE    7,9
14: 8 FALSE  7,9,8
Onyambu
  • 67,392
  • 3
  • 24
  • 53