You can subset to col1 and col2 using [
:
df[!(duplicated(df[, c("col1", "col2")])),]
or using in addition paste:
df[!(duplicated(paste(df$col1, df$col2))),]
Benchmark:
df <- data.frame(col1 = rep(letters[1:3], each=4)
, col2 = rep(letters[1:4], each=3),
col3 = rep(letters[1:2], 6))
library(data.table)
dt <- as.data.table(df)
library(collapse)
library(dplyr)
bench::mark(check = FALSE
, dplyr = df[!(duplicated(dplyr::select(df, col1, col2))), ]
, dplyr2 = (df %>% distinct(across(c(col1, col2)), .keep_all = TRUE) %>% as_tibble())
, base = df[!(duplicated(df[, c("col1", "col2")])),]
, base2 = df[!(duplicated(paste(df$col1, df$col2))),]
, dt = unique(dt, by = c('col1', 'col2'))
, funique = funique(df, cols = c("col1", "col2"))
)
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc
# <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl>
#1 dplyr 1.47ms 1.53ms 647. 6.44KB 14.8 305 7
#2 dplyr2 3.05ms 3.16ms 314. 1.75MB 15.2 145 7
#3 base 105.72µs 112.72µs 8525. 0B 19.0 4039 9
#4 base2 59.55µs 64.29µs 15093. 0B 16.6 7270 8
#5 dt 42.47µs 50.2µs 20178. 92.66KB 12.5 9699 6
#6 funique 18.48µs 20.25µs 47910. 50.81KB 19.2 9996 4