0

I am trying to give ranknumber according to qtr wise here is the inpt and output

INPUT:-

year   qtr  month   day          salry  
2001    q1  jan     1           1000.00         
2001    q1  jan     2           1500.00         
2001    q1  feb     1           3000.00               
2001    q1  feb     2           2100.00     
2001    q1  march   1          1000.00    
2001    q1  march   2          1200.00      
2001    q2  april   1          1300.00    
2001    q2  april   2          1900.00      
2001    q2  may     1          1000.00      
2001    q2  may     2          1800.00    
2001    q2  june    1          1600.00    
2001    q2  june    2          1900.00    
2001    q3  july    1          2000.00    
2001    q3  july    2          1800.00    
2001    q3  august  1          2100.00    
2001    q3  august  2          1500.00    
2001    q3  sept    1          1700.00    
2001    q3  sept    2          1400.00    
2001    q4  oct     1          1300.00    
2001    q4  oct     2          1200.00        
2001    q4  nov     1          1100.00    
2001    q4  nov     2          1700.00    
2001    q4  dec     1          1000.00      
2001    q4  dec     2          1700.00  

OUTPUT:-

year    qtr     month  day  salry      rank  
2001    q1      feb     1   3000.00     1  
2001    q1      feb     2   2100.00     2  
2001    q1      jan     2   1500.00     3  
2001    q1      march   2   1200.00     4  
2001    q1      jan     1   1000.00     5  
2001    q1      march   1   1000.00     6    
2001    q2      april   2   1900.00     1  
2001    q2      june    2   1900.00     2  
2001    q2      may     2   1800.00     3  
2001    q2      june    1   1600.00     4  
2001    q2      april   1   1300.00     5  
2001    q2      may     1   1000.00     6  
2001    q3      august  1   2100.00     1  
2001    q3      july    1   2000.00     2  
2001    q3      july    2   1800.00     3  
2001    q3      sept    1   1700.00     4  
2001    q3      august  2   1500.00     5  
2001    q3      sept    2   1400.00     6  
2001    q4      nov     2   1700.00     1  
2001    q4      dec     2   1700.00     2  
2001    q4      oct     1   1300.00     3  
2001    q4      oct     2   1200.00     4    
2001    q4      nov     1   1100.00     5    
2001    q4      dec     1   1000.00     6 
jdobres
  • 11,339
  • 1
  • 17
  • 37
murali
  • 41
  • 6
  • 1
    `library(tidyverse); your_data %>% group_by(year, qtr) %>% mutate(rank = row_number())` – jdobres Feb 09 '22 at 18:28
  • `input %>% group_by(year, qtr) %>% mutate(rank = rank(desc(salary), ties.method = "min")) %>% arrange(year, qtr, rank)` – rdelrossi Feb 09 '22 at 18:39
  • @akrun thanks for finding the dupe, I knew it was somewhere ... it was quicker for me to put in a quick answer than to search for it (I knew it was there somewhere), I figured somebody would know the dupe better than I did. – r2evans Feb 09 '22 at 19:00

3 Answers3

3

dplyr

library(dplyr)
dat %>%
  group_by(year, qtr) %>%
  mutate(rank2 = row_number()) %>%
  ungroup()
# # A tibble: 24 x 7
#     year qtr   month   day salry  rank rank2
#    <int> <chr> <chr> <int> <dbl> <int> <int>
#  1  2001 q1    feb       1  3000     1     1
#  2  2001 q1    feb       2  2100     2     2
#  3  2001 q1    jan       2  1500     3     3
#  4  2001 q1    march     2  1200     4     4
#  5  2001 q1    jan       1  1000     5     5
#  6  2001 q1    march     1  1000     6     6
#  7  2001 q2    april     2  1900     1     1
#  8  2001 q2    june      2  1900     2     2
#  9  2001 q2    may       2  1800     3     3
# 10  2001 q2    june      1  1600     4     4
# # ... with 14 more rows

base R

dat$rank3 <- ave(seq_len(nrow(dat)), dat[c("year","qtr")], FUN = seq_along)
head(dat, 10)
#    year qtr month day salry rank rank3
# 1  2001  q1   feb   1  3000    1     1
# 2  2001  q1   feb   2  2100    2     2
# 3  2001  q1   jan   2  1500    3     3
# 4  2001  q1 march   2  1200    4     4
# 5  2001  q1   jan   1  1000    5     5
# 6  2001  q1 march   1  1000    6     6
# 7  2001  q2 april   2  1900    1     1
# 8  2001  q2  june   2  1900    2     2
# 9  2001  q2   may   2  1800    3     3
# 10 2001  q2  june   1  1600    4     4
r2evans
  • 141,215
  • 6
  • 77
  • 149
  • noticed a small syntax error: `dat[, c("year", "qtr")]` – rg4s Feb 10 '22 at 07:30
  • @rg4s, where's the error? The omission of a comma is perfectly fine with `data.frame` and `tbl_df`, the only time this is a problem is if `dat` is a `data.table` ... which, if it is, the user never said as such. – r2evans Feb 10 '22 at 14:55
1

This should work using tidyverse packages

library(tidyverse)

dat <- tibble::tribble(~year,~qtr, ~month, ~salry,
                       2001, 'q1', 'jan 1', 1000.00,
                       2001, 'q1', 'jan 2', 1500.00,
                       2001, 'q1', 'feb 1', 3000.00,
                       2001, 'q1', 'feb 2', 2100.00,
                       2001, 'q1', 'march 1', 1000.00,
                       2001, 'q1', 'march 2', 1200.00,
                       2001, 'q2', 'april 1', 1300.00,
                       2001, 'q2', 'april 2', 1900.00,
                       2001, 'q2', 'may 1', 1000.00,
                       2001, 'q2', 'may 2', 1800.00,
                       2001, 'q2', 'june 1', 1600.00,
                       2001, 'q2', 'june 2', 1900.00,
                       2001, 'q3', 'july 1', 2000.00,
                       2001, 'q3', 'july 2', 1800.00,
                       2001, 'q3', 'august 1', 2100.00,
                       2001, 'q3', 'august 2', 1500.00,
                       2001, 'q3', 'sept 1', 1700.00,
                       2001, 'q3', 'sept 2', 1400.00,
                       2001, 'q4', 'oct 1', 1300.00,
                       2001, 'q4', 'oct 2', 1200.00,
                       2001, 'q4', 'nov 1', 1100.00,
                       2001, 'q4', 'nov 2', 1700.00,
                       2001, 'q4', 'dec 1', 1000.00,
                       2001, 'q4', 'dec 2', 1700.00
) %>% 
  dplyr::group_by(qtr) %>% 
  dplyr::arrange(qtr, salry) %>% 
  dplyr::mutate(rank = dplyr::row_number())
MattO
  • 171
  • 1
  • 6
1

Added microbenchmarks results!

I love dplyr solution by r2evans!

But here's data.table solution:

library(data.table)

data <- data.frame(
  
  year = rep(2001, 24),
  qrt = rep(paste0("q", 1:4), each = 6),
  month = sample(c("jan", "jan", "feb", "march",
                   "apr", "june", "june", "july", 
                   "aug", "sept", "oct", "jan",
                   "jan", "feb", "march",
                   "apr", "june", "june", "july", 
                   "aug", "sept", "oct", "july", "aug"))
  
)

setDT(data)

data[, row_n := .I, by = .(qrt, month)]

Output:

    year qrt month row_n
 1: 2001  q1   jan     1
 2: 2001  q1   apr     2
 3: 2001  q1  june     3
 4: 2001  q1  sept     4
 5: 2001  q1   jan     5
 6: 2001  q1   oct     6
         ...
20: 2001  q4  july    20
21: 2001  q4  june    21
22: 2001  q4  july    22
23: 2001  q4 march    23
24: 2001  q4   aug    24

If you need to arrange any of your columns before further manipulations, you can use setorder function:

setorder(data, qrt, month)
data[, row_n := .I, by = .(qrt, month)]

Then the output will be the following:

    year qrt month row_n
 1: 2001  q1   aug     1
 2: 2001  q1  july     2
 3: 2001  q1  july     3
 4: 2001  q1  june     4
 5: 2001  q1  sept     5
         ...
19: 2001  q4   aug    19
20: 2001  q4   jan    20
21: 2001  q4  june    21
22: 2001  q4  june    22
23: 2001  q4 march    23
24: 2001  q4   oct    24

Benchmarks

Base R function wins! However, it looks not as nice as dplyr or data.table solution.

mbm <- microbenchmark::microbenchmark(
  
  base_R = {
    
    data$rank <- ave(seq_len(nrow(data)), data[, c("year","qrt")], FUN = seq_along)
    
  },
  
  r2evans = {
    
    library(dplyr)
    data %>%
      group_by(year, qrt) %>%
      mutate(rank = row_number()) %>%
      ungroup()
  
},

  rg4s = {
    
    library(data.table)
    setDT(data)
    data[, row_n := .I, by = .(qrt, month)]
},

times = 100)

enter image description here

rg4s
  • 811
  • 5
  • 22