1

The objective is to populate a new column (df$final.count) according to multiple conditions. An example data frame below:

structure(list(item = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L), .Label = c("a", "b"), class = "factor"), raw.count = c(16, 
300, 203, 6, 5, 40, 20, 16, 300, 203), loc = structure(c(4L, 
2L, 2L, 2L, 2L, 3L, 3L, 4L, 2L, 3L), .Label = c("  ", "in", "out", 
"NA"), class = "factor"), side = structure(c(4L, 2L, 3L, 2L, 
3L, 4L, 3L, 4L, 2L, 4L), .Label = c("F", "L", "R", "NA"), class = "factor"), 
    recount = c(15, NA, NA, 7, NA, NA, 16, 15, NA, NA), final.count = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_), EXPECTED = c(15, 60, 120, 
    7, 5, 40, 16, 15, 300, 203)), row.names = c(NA, 10L), class = "data.frame")

The objective is to populate a new column (df$final.count) according to the following conditions affecting multiple columns:

  1. if there is a number in df$recount THAN df$recount should be used in df$final.count unconditional to other column values
  2. if there is no number (NA) in df$recount AND df$raw.count > 10 AND df$loc is "in" AND df$side is "L" THAN function 0.2*df$raw.count should be used to populate df$final.count
  3. if there is no number (NA) in df$recount AND df$raw.count > 10 AND df$loc is "in" AND df$side is "R" THAN function 0.6*df$raw.count should be used to populate df$final.count (NOTE only side is different)
  4. if df$raw.count =<10 than df$raw.count should be used exept if 1 above holds
  5. if df$loc is "out" than df$final.count <- df$raw.count unconditional to other column values exept if 1 above holds

I have tried various versions of if / else if in a loop, for example:

  for (i in 1:nrow(df)) {
  if(!is.na(df$recount[i]) {
    df$final.count <- df$recount
  }
  else if(df$item[i] == "a" & df$raw.count[i] > 10 & df$loc[i] == "in" & df$side[i] == "L") {
    df$final.count <- 0.2*df$raw.count[i]
  }
  else if(df$item[i] == "a" & df$raw.count[i] > 10 & df$loc[i] == "in" & df$side[i] == "R") {
    df$final.count <- 0.6*df$raw.count[i]
  }
  else if(df$raw.count <= 10){
    df$final.count <- df$raw.count
  }
  else(df$loc == "out") {
    df$final.count <- df$raw.count
  }
}
doncarlos
  • 401
  • 4
  • 16
  • I am not sure if your logic is correct but you are missing `i` in all your `df$final.count` and also in `raw.count` in last two conditions. – Ronak Shah Dec 05 '18 at 07:40

1 Answers1

2

if you use a case_when() from the dplyr-package, it becomes more readable.. you can also loose the for.

library( dplyr )
df %>%
  mutate( final.cond = case_when(
    !is.na( recount ) ~ recount,
    item == "a" & raw.count > 10 & loc == "in" & side == "L" ~ 0.2 * raw.count,
    item == "a" & raw.count > 10 & loc == "in" & side == "R" ~ 0.6 * raw.count,
    raw.count <= 10 ~ raw.count,
    loc == "out" ~ raw.count,
    TRUE ~ as.numeric(NA)
  ))
Wimpel
  • 26,031
  • 1
  • 20
  • 37
  • 1
    A very readable solution and it solved my question with a few tweaks. Also had to change last line to TRUE ~ as.numeric(NA) following https://stackoverflow.com/questions/44893933/avoiding-type-conflicts-with-dplyrcase-when – doncarlos Dec 05 '18 at 08:13