Another data.table
option using if/else
. We convert the 'data.frame' to 'data.table' (setDT(df1)
), grouped by 'ID', 'YearMon', if
all
of the 'Count' values are 'NA' we return the Subset of Data.table (.SD
) or else
we get the index of maximum value of 'Count' and subset the data.table (.SD[which.max(Count)]
).
library(data.table)
setDT(df1)[, if(all(is.na(Count))) .SD else .SD[which.max(Count)],.(ID, YearMon)]
# ID YearMon Var Count
#1: 1 12007 D 2
#2: 1 22007 NA
#3: 1 32007 H 1
#4: 2 12007 H 1
#5: 2 22007 D 1
#6: 2 32007 NA
Or another option would be to concatenate the index from which.max
and the rows which have all 'NA' for 'Count' grouped by the variables, get the row index (.I
) and use that to subset the 'data.table'.
setDT(df1)[df1[, .I[c(which.max(Count), all(is.na(Count)))], .(ID, YearMon)]$V1]
# ID YearMon Var Count
#1: 1 12007 D 2
#2: 1 22007 NA
#3: 1 32007 H 1
#4: 2 12007 H 1
#5: 2 22007 D 1
#6: 2 32007 NA
Or we replace
the NA by a very small number, use which.max
and subset
setDT(df1)[, .SD[which.max(replace(Count, is.na(Count),-Inf ))], .(ID, YearMon)]
data
df1 <- structure(list(ID = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L),
YearMon = c(12007L,
12007L, 22007L, 32007L, 12007L, 22007L, 22007L, 32007L), Var = c("H",
"D", "", "H", "H", "", "D", ""), Count = c(1L, 2L, NA, 1L, 1L,
NA, 1L, NA)), .Names = c("ID", "YearMon", "Var", "Count"),
class = "data.frame", row.names = c(NA, -8L))