0

I have a table

ID         RATES
 1          0.01
 1            0
 1            0
 1            0
 2          0.05
 2          0.05
 2          0.01
 2            0
 3            0
 3            0
 3            0

Ideally, I want to create a new column called n_rates which replace the 0 values in rate column by group (ID) and conditions.

Condition 1 - If at least one rate IN (0.01, 0.015, 0.05) for this ID THEN replace all 0 values by the most rate values (e.g. if for this ID, 0.01 appears more than 0.05 then replace 0 with 0.01. Only rates == 0 rows can be replaced)

Condition 2 - If rates NOT IN (0.01, 0.015, 0.05) then make no changes to the rows
NightDog
  • 91
  • 7

3 Answers3

1

You just need to find the mode , the most frequency value for each group , I am using dplyr group_by here

function from Ken

Mode <- function(x) {
    ux <- unique(x)
    ux[which.max(tabulate(match(x, ux)))]
}

library(dplyr)
df1=dt[dt$RATES%in%c(0.05,0.01),]%>%group_by(ID)%>%summarise(Value=Mode(RATES)) 

dt=merge(dt,df1,on='ID',all.x=T)# merge back the result to original dt    
dt$RATES[dt$RATES==0]=dt$Value[dt$RATES==0] # assign the value only if the RATES equal to 0

dt$RATES[is.na(dt$RATES)]=0 # fill NA back to 0 
dt$Value=NULL# drop the helper column

Result

dt

   ID RATES
1   1  0.01
2   1  0.01
3   1  0.01
4   1  0.01
5   2  0.05
6   2  0.05
7   2  0.05
8   2  0.01
9   3  0.00
10  3  0.00
11  3  0.00
BENY
  • 317,841
  • 20
  • 164
  • 234
1

For a one-liner data.table answer, and using Ken's function:

Mode <- function(x) {
    ux <- unique(x)
    ux[which.max(tabulate(match(x, ux)))]
}

library(data.table)
setDT(df)[, Rates := ifelse(Rates==0 & any(Rates!=0), 
                             Mode(Rates[Rates!=0]), Rates), by = ID]
df

#ID Rates
#1  0.01
#1  0.01
#1  0.01
#1  0.01
#2  0.05
#2  0.05
#2  0.01
#2  0.05
#3  0.00
#3  0.00
#3  0.00
Yannis Vassiliadis
  • 1,719
  • 8
  • 14
0

Here's a function called by %>% do(...)

myfun <- function(df) {
    targets <- c(0.01, 0.015, 0.05)
    if (any(unique(df$RATES) %in% targets)) {
        val <- as.numeric(names(head(sort(-table(df$RATES[df$RATES > 0])), 1)))
        df %>%
                mutate(RATES = ifelse(RATES==0, val, RATES))
    } else {
        df
    }
}

library(dplyr)
df %>%
  group_by(ID) %>%
  do(myfun(.))

# A tibble: 11 x 2
# Groups: ID [3]
      # ID  RATES
   # <int>  <dbl>
 # 1     1 0.0100
 # 2     1 0.0100
 # 3     1 0.0100
 # 4     1 0.0100
 # 5     2 0.0500
 # 6     2 0.0500
 # 7     2 0.0100
 # 8     2 0.0500
 # 9     3 0.    
# 10     3 0.    
# 11     3 0.

Data

df <- read.table(text="ID         RATES
 1          0.01
 1            0
 1            0
 1            0
 2          0.05
 2          0.05
 2          0.01
 2            0
 3            0
 3            0
 3            0", header=TRUE)
CPak
  • 13,260
  • 3
  • 30
  • 48