One very fast solution (specially designed for big data sets), could be using data.table
library(data.table)
setDT(df)[, list(SumNAs = sum(is.na(.SD))), by = a]
# a SumNAs
# 1: class 1 10
# 2: class 2 4
# 3: class 3 3
Or with base R
df2 <- data.frame(a = df[, 1], freq = rowSums(is.na(df[, -1])))
with(df2, tapply(freq, a, sum))
## class 1 class 2 class 3
## 10 4 3
Edit
Here are some benchmarks, as per OPs comment re big data set with many columns
set.seed(123)
n <- 1e5
df <- data.frame(a = sample(c("class 1", "class 2", "class 3"), n, replace = TRUE),
b = sample(c(1:6, NA), n, replace = TRUE),
c = sample(c(1:6, NA), n, replace = TRUE),
d = sample(c(1:6, NA), n, replace = TRUE),
e = sample(c(1:6, NA), n, replace = TRUE),
f = sample(c(1:6, NA), n, replace = TRUE),
j = sample(c(1:6, NA), n, replace = TRUE),
h = sample(c(1:6, NA), n, replace = TRUE),
i = sample(c(1:6, NA), n, replace = TRUE),
k = sample(c(1:6, NA), n, replace = TRUE),
l = sample(c(1:6, NA), n, replace = TRUE),
m = sample(c(1:6, NA), n, replace = TRUE),
n = sample(c(1:6, NA), n, replace = TRUE))
library(microbenchmark)
df2 <- copy(df)
davidDT <- function(x) setDT(x)[, list(SumNAs = sum(is.na(.SD))), by = a]
davidBaseR <- function(x){
df2 <- data.frame(a = x[, 1], freq = rowSums(is.na(x[, -1])))
with(df2, tapply(freq, a, sum))
}
RichardBaseR <- function(x){
cb <- cbind(x[1], isNA = rowSums(is.na(x[-1])))
aggregate(isNA ~ a, cb, sum)
}
microbenchmark(davidDT(df2),
davidBaseR(df),
RichardBaseR(df),
times = 100L)
# Unit: milliseconds
# expr min lq median uq max neval
# davidDT(df2) 34.25858 36.91607 39.19706 41.18780 113.0531 100
# davidBaseR(df) 32.75058 36.46721 43.01609 47.66303 199.7966 100
# RichardBaseR(df) 1429.29449 1469.32023 1521.38640 1631.51353 2525.2406 100