0

I have this data set

structure(list(ANO_REF = c(2016L, 2016L, 2016L, 2016L, 2016L,
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L), MES_REF = c(12L, 11L,
10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, 6L, 12L, 11L, 11L, 10L,
9L, 9L, 9L), FLUXO = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), NPC = c(103920617L,
103920617L, 103920617L, 103920617L, 103920617L, 103920617L, 103920617L,
103920617L, 103920617L, 103920617L, 103920617L, 103920617L, 106320750L,
106320750L, 106320750L, 106320750L, 106320750L, 106320750L, 106320750L,
106320750L), PAIS_COD = c("ES", "ES", "ES", "ES", "ES", "ES",
"ES", "ES", "ES", "ES", "ES", "ES", "AT", "BE", "BE", "BE", "BE",
"BE", "BE", "BE"), NC8 = c(7051900L, 7051900L, 7051900L, 7051900L,
7051900L, 7051900L, 7051900L, 7051900L, 7051900L, 7051900L, 7051900L,
7051900L, 22042189L, 22042169L, 22042169L, 22042189L, 22042169L,
22042138L, 22042169L, 22042189L), VF = c(14297, 17326, 33461,
37192, 39201, 35816, 25126, 28974, 16166, 14058, 11517, 5381,
2526, 11664, 6401, 5135, 10, 540, 1188, 2676), ML = c(16000,
23000, 31750, 32000, 34500, 36750, 22250, 31500, 23160, 13250,
11000, 7250, 166.5, 1458, 315, 253.5, 7.5, 108, 162, 222), US = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 167, 1458, 315, 256, 8, 108,
162, 222), NPCPAISCOD = c("103920617ES", "103920617ES", "103920617ES",
"103920617ES", "103920617ES", "103920617ES", "103920617ES", "103920617ES",
"103920617ES", "103920617ES", "103920617ES", "103920617ES", "106320750AT",
"106320750BE", "106320750BE", "106320750BE", "106320750BE", "106320750BE",
"106320750BE", "106320750BE")), row.names = c("1508849", "1442666",
"1378683", "1314381", "1259396", "1195766", "309403", "246636",
"183714", "119203", "57345", "124", "1121677", "2312830", "2241638",
"2241639", "2169806", "2103631", "2103632", "2103633"), class = "data.frame")

and I want to create a new variable that counts the number of records for each NPCPAISCOD record.

This means from line 1 to 12 1,2, ..., 12 which have 103920617ES, line 13 is 1 (it is a single record) which have 103920617ES, line 14 is 1 (it is a single record) which have 106320750AT, from line 14 to 20 1,2, ..., 16 which have 106320750BE.

This means the new variable starts counting conditional to a change in NPCPAISCOD variable. When this one changes we have a new counting until the next change.

Thank you.

ThomasIsCoding
  • 96,636
  • 9
  • 24
  • 81
ramgouveia
  • 5
  • 1
  • 4

1 Answers1

0

here is a data.table approach

library( data.table )
setDT(mydata)
mydata[, new_var := rowid( NPCPAISCOD ) ]

output

#     ANO_REF MES_REF FLUXO       NPC PAIS_COD      NC8    VF      ML   US  NPCPAISCOD new_var
#  1:    2016      12     2 103920617       ES  7051900 14297 16000.0    0 103920617ES       1
#  2:    2016      11     2 103920617       ES  7051900 17326 23000.0    0 103920617ES       2
#  3:    2016      10     2 103920617       ES  7051900 33461 31750.0    0 103920617ES       3
#  4:    2016       9     2 103920617       ES  7051900 37192 32000.0    0 103920617ES       4
#  5:    2016       8     2 103920617       ES  7051900 39201 34500.0    0 103920617ES       5
#  6:    2016       7     2 103920617       ES  7051900 35816 36750.0    0 103920617ES       6
#  7:    2016       6     2 103920617       ES  7051900 25126 22250.0    0 103920617ES       7
#  8:    2016       5     2 103920617       ES  7051900 28974 31500.0    0 103920617ES       8
#  9:    2016       4     2 103920617       ES  7051900 16166 23160.0    0 103920617ES       9
# 10:    2016       3     2 103920617       ES  7051900 14058 13250.0    0 103920617ES      10
# 11:    2016       2     2 103920617       ES  7051900 11517 11000.0    0 103920617ES      11
# 12:    2016       1     2 103920617       ES  7051900  5381  7250.0    0 103920617ES      12
# 13:    2018       6     2 106320750       AT 22042189  2526   166.5  167 106320750AT       1
# 14:    2018      12     2 106320750       BE 22042169 11664  1458.0 1458 106320750BE       1
# 15:    2018      11     2 106320750       BE 22042169  6401   315.0  315 106320750BE       2
# 16:    2018      11     2 106320750       BE 22042189  5135   253.5  256 106320750BE       3
# 17:    2018      10     2 106320750       BE 22042169    10     7.5    8 106320750BE       4
# 18:    2018       9     2 106320750       BE 22042138   540   108.0  108 106320750BE       5
# 19:    2018       9     2 106320750       BE 22042169  1188   162.0  162 106320750BE       6
# 20:    2018       9     2 106320750       BE 22042189  2676   222.0  222 106320750BE       7
Wimpel
  • 26,031
  • 1
  • 20
  • 37