Benchmarking a couple new solutions along with a few already posted:
library(Rfast)
library(microbenchmark)
mymat <- matrix(sample(100, 4000, replace = TRUE), nrow = 2000)
noDup <- function(m) {
return(!(duplicated(m) | duplicated(m, fromLast = TRUE)))
}
combounique1 <- function(m) {
return(m[noDup(rowSort(m)),])
}
combounique2 <- function(m) {
msum <- rowsums(m)
return(m[noDup(rowsums(m^2) + msum + (msum - 3)*abs(m[,1] - m[,2])),])
}
combounique3 <- function(m) {
return(m[noDup(rowsums(m + 1/m)),])
}
combounique4 <- function(m) {
# similar to Harrison Jones, but correct
return(m[noDup(rbind(m, m[m[,1] != m[,2], 2:1]))[1:nrow(m)],])
}
combounique5 <- function(m) {
# similar to Ronak Shah, but maintains ordering within rows
tmp <- t(apply(m, 1, sort))
return(m[noDup(tmp),])
}
r2evans <- function(m) {
tmp <- apply(m, 1, function(z) toString(sort(z)))
return(m[ave(tmp, tmp, FUN = length) == "1",, drop = FALSE])
}
microbenchmark(mymat1 <- combounique1(mymat),
mymat2 <- combounique2(mymat),
mymat3 <- combounique3(mymat),
mymat4 <- combounique4(mymat),
mymat5 <- combounique5(mymat),
mymat6 <- r2evans(mymat))
expr min lq mean median uq max neval
mymat1 <- combounique1(mymat) 7129.9 7642.30 9236.841 8205.45 9467.70 28363.7 100
mymat2 <- combounique2(mymat) 171.0 197.30 219.341 215.75 225.45 385.5 100
mymat3 <- combounique3(mymat) 144.2 166.95 187.340 182.50 192.30 306.7 100
mymat4 <- combounique4(mymat) 14263.1 15343.90 17938.061 16417.30 19043.30 34884.9 100
mymat5 <- combounique5(mymat) 48230.9 50773.75 57662.463 55041.90 60968.35 193804.2 100
mymat6 <- r2evans(mymat) 66180.3 70835.30 78642.552 77299.85 81992.60 161034.5 100
> all(sapply(list(mymat1, mymat2, mymat3, mymat4, mymat5, mymat6), FUN = identical, mymat1))
[1] TRUE
Note that combounique2
and combounique3
are only strictly correct for integer values. The idea is to use a symmetric pairing function to get a unique value for each pair of integers, then use duplicated
on that.
(see https://math.stackexchange.com/questions/3162166/what-function-symmetric-and-has-unique-solution)