1
ID <- c("A", "A", "A", "B", "B", "c")
Value <- c("blue", "blue", "green", "red", "orange", NA)
df <- tibble(ID, Value)

I have to group by the ID, keep the value of Value which is the most repeated If the values are equal (ID == "B"), I choose the first value

The value variable should look like :

Value_output <- c("blue", "blue", "blue", "red", "red", NA)
camille
  • 16,432
  • 18
  • 38
  • 60
Mostafa90
  • 1,674
  • 1
  • 21
  • 39
  • 1
    Take a look at [this post](https://stackoverflow.com/a/8189441/8583393) and then do `... %>% mutate(Value = Mode(Value))` – markus Jul 17 '18 at 14:46

3 Answers3

2

Solution with data.table package (count Value by ID).

ID <- c("A", "A", "A", "B", "B", "c")
Value <- c("blue", "blue", "green", "red", "orange", NA)

library(data.table)
foo <- data.table(ID, Value)
setkey(foo, ID)
foo[foo[, .N, .(ID, Value)][order(N, decreasing = TRUE)][, .(Value = Value[1]), ID]]$i.Value
[1] "blue" "blue" "blue" "red"  "red"  NA    
pogibas
  • 27,303
  • 19
  • 84
  • 117
2

We can get the Mode by group

library(dplyr)
df %>%
   group_by(ID) %>%
   arrange(ID, is.na(Value)) %>% # in case to keep non- NA elements for a tie
   mutate(Value_output = Mode(Value))

where

 Mode <- function(x) {
   ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
 }

data

ID <- c("A", "A", "A", "B", "B", "c", "c")
Value <- c("blue", "blue", "green", "red", "orange", NA, "yellow")
df <- tibble(ID, Value)
akrun
  • 874,273
  • 37
  • 540
  • 662
2

Using base R:

lot <- aggregate(
  Value ~ ID, 
  df, 
  function(x) names(sort(table(x), decreasing=TRUE))[1]
)
df$Value <- lot[match(df$ID, lot$ID), "Value"]
df
  ID    Value 
  <chr> <chr> 
1 A     blue  
2 A     blue  
3 A     blue  
4 B     orange
5 B     orange
6 c     NA   
s_baldur
  • 29,441
  • 4
  • 36
  • 69