4

I am trying to create an additional variable (new variable-> flag) that will number the repetition of observation in my variable starting from 0.

dataset <- data.frame(id = c(1,1,1,2,2,4,6,6,6,7,7,7,7,8))

intended results will look like:

id   flag
1     0
1     1
1     2
2     0
2     1
4     0
6     0
6     1
6     2
7     0
7     1
7     2
7     3
8     0

Thank You!

Park
  • 14,771
  • 6
  • 10
  • 29
zas
  • 85
  • 2

5 Answers5

2

You may try

dataset$flag <- unlist(sapply(rle(dataset$id)$length, function(x) seq(1,x)-1))

   id flag
1   1    0
2   1    1
3   1    2
4   2    0
5   2    1
6   4    0
7   6    0
8   6    1
9   6    2
10  7    0
11  7    1
12  7    2
13  7    3
14  8    0
Park
  • 14,771
  • 6
  • 10
  • 29
1

data.table:

library(data.table)
setDT(dataset)[, flag := rowid(id) - 1]
dataset
    id flag
 1:  1    0
 2:  1    1
 3:  1    2
 4:  2    0
 5:  2    1
 6:  4    0
 7:  6    0
 8:  6    1
 9:  6    2
10:  7    0
11:  7    1
12:  7    2
13:  7    3
14:  8    0

Base R:

dataset$flag = sequence(rle(dataset$id)$lengths) - 1 
dataset
   id flag
1   1    0
2   1    1
3   1    2
4   2    0
5   2    1
6   4    0
7   6    0
8   6    1
9   6    2
10  7    0
11  7    1
12  7    2
13  7    3
14  8    0
Onyambu
  • 67,392
  • 3
  • 24
  • 53
1

Another base option:

transform(dataset,
          flag = Reduce(function(x, y) y * x + y, duplicated(id), accumulate = TRUE))

   id flag
1   1    0
2   1    1
3   1    2
4   2    0
5   2    1
6   4    0
7   6    0
8   6    1
9   6    2
10  7    0
11  7    1
12  7    2
13  7    3
14  8    0
Ritchie Sacramento
  • 29,890
  • 4
  • 48
  • 56
1

dplyr -

library(dplyr)

dataset %>% group_by(id) %>% mutate(flag = row_number() - 1)

#      id  flag
#   <dbl> <dbl>
# 1     1     0
# 2     1     1
# 3     1     2
# 4     2     0
# 5     2     1
# 6     4     0
# 7     6     0
# 8     6     1
# 9     6     2
#10     7     0
#11     7     1
#12     7     2
#13     7     3
#14     8     0

Base R with similar logic

transform(dataset, flag = ave(id, id, FUN = seq_along) - 1)
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
0

another way to reach what you expect but writing a little more

x <- dataset %>%
     group_by(id) %>% 
     summarise(nreg=n())

 df <- data.frame()

 for(i in 1:nrow(x)){

        flag <- data.frame(id = rep( x$id[i], x$nreg[i] ),
                           flag = seq(0, x$nreg [i] -1 )
                           )


        df <- rbind(df, flag)

   }
matheus
  • 11
  • 4