1

If I have the following data:

library(dplyr)

tibble(
  id = rep(c("A", "B"), each = 3) %>% rep(2)
)
#> # A tibble: 12 x 1
#>    id   
#>    <chr>
#>  1 A    
#>  2 A    
#>  3 A    
#>  4 B    
#>  5 B    
#>  6 B    
#>  7 A    
#>  8 A    
#>  9 A    
#> 10 B    
#> 11 B    
#> 12 B

If I want to count the sequence of rows by group, I would normally do the following:

tibble(
  id = rep(c("A", "B"), each = 3) %>% rep(2)
) %>% 
  group_by(id) %>% 
  mutate(sequence_group = seq_along(id))
#> # A tibble: 12 x 2
#> # Groups:   id [2]
#>    id    sequence_group
#>    <chr>          <int>
#>  1 A                  1
#>  2 A                  2
#>  3 A                  3
#>  4 B                  1
#>  5 B                  2
#>  6 B                  3
#>  7 A                  4
#>  8 A                  5
#>  9 A                  6
#> 10 B                  4
#> 11 B                  5
#> 12 B                  6

However, I would the count to restart every time the group changes. This is expected output:

#> # A tibble: 12 x 2
#>    id    sequence_group
#>    <chr>          <int>
#>  1 A                  1
#>  2 A                  2
#>  3 A                  3
#>  4 B                  1
#>  5 B                  2
#>  6 B                  3
#>  7 A                  1
#>  8 A                  2
#>  9 A                  3
#> 10 B                  1
#> 11 B                  2
#> 12 B                  3

Any suggestions?

FMM
  • 1,857
  • 1
  • 15
  • 38

2 Answers2

1

Does this work:

library(dplyr)
df %>% mutate(rids = rep(seq_along(rle(id)$values), rle(id)$lengths)) %>% 
   group_by(rids) %>% mutate(sequence_group = row_number()) %>% ungroup() %>% select(-rids)
# A tibble: 12 x 2
   id    sequence_group
   <chr>          <int>
 1 A                  1
 2 A                  2
 3 A                  3
 4 B                  1
 5 B                  2
 6 B                  3
 7 A                  1
 8 A                  2
 9 A                  3
10 B                  1
11 B                  2
12 B                  3
Karthik S
  • 11,348
  • 2
  • 11
  • 25
1

Using data.table helper functions:

library(data.table)
df$sequence_group <- rowid(rleid(df$id))    
df
#    id    sequence_group
#    <chr>          <int>
#  1 A                  1
#  2 A                  2
#  3 A                  3
#  4 B                  1
#  5 B                  2
#  6 B                  3
#  7 A                  1
#  8 A                  2
#  9 A                  3
# 10 B                  1
# 11 B                  2
# 12 B                  3

More similar to your dplyr workflow would be:

df %>% 
  group_by(tmp = rleid(id)) %>% 
  mutate(sequence_group = seq_along(id)) %>% 
  ungroup() %>% 
  select(-tmp)
# Or simply
df <- df %>% mutate(sequence_group = rowid(rleid(id)))

Finally, using just base R:

df$sequence_group <- unlist(lapply(rle(df$id)$lengths, seq_len))
s_baldur
  • 29,441
  • 4
  • 36
  • 69