0

My dataset has 377 rows and I want to comapare each row with all the other rows in the dataset and if two rows are completely equal then print a message.

for (i in 1:377)
{
for (j in 2:377)
  {
       if(row[i]== row[j])
        {
       print("Matched")

        }
      else j=j+1
  }

i=i+1

}

This is the simple algorithm of the situation. I am very new to R so I have problem in writing code so this algorithm might help you people in answering. Thank you for your time.

2 Answers2

1
DF <- data.frame(A=c(1,3,2,1,4,3,3),B=c('A','X','B','A','X','X','Y'))

# example input
# > DF
#   A B
# 1 1 A
# 2 3 X
# 3 2 B
# 4 1 A
# 5 4 X
# 6 3 X
# 7 3 Y

# find which row indexes have duplicates
rowIndexesWithDupe <- which( duplicated(DF) | rev(duplicated(DF[rev(seq_len(nrow(DF))),])) )

# print
if(length(rowIndexesWithDupe) > 0){
  print(paste('Rows:',toString(rowIndexesWithDupe),'have duplicates'))
}else{
  print('No duplicates')
}

# output in this case : 
[1] "Rows: 1, 2, 4, 6 have duplicates"

EDIT :

As per comment, to check "almost duplicates" (e.g. within difference <= 5) you could use a for loop
(N.B.: we're assuming all values are numeric !):

DF <- data.frame(A=c(1,3,2,10,9,3,3),b=c(10,12,13,16,11,9,8))

# example input
# > DF
#    A  b
# 1  1 10
# 2  3 12
# 3  2 13
# 4 10 16
# 5  9 11
# 6  3  9
# 7  3  8

toler <- 5
dupes <- integer()
for(i in 1:(nrow(DF)-1)){
  row1 <- DF[i,]
  for(j in (i+1):nrow(DF)){
    row2 <- DF[j,]
    if(all(abs(row1-row2) <= toler)){
      dupes <- c(dupes,i,j)
    }
  }
}
dupes <- sort(unique(dupes))
print(paste('Rows:',toString(dupes),'have duplicates within tolerance =',toler))

# output in this case :
[1] "Rows: 1, 2, 3, 6, 7 have duplicates within tolerance = 5"
digEmAll
  • 56,430
  • 9
  • 115
  • 140
1

You can use anyDuplicated:

DF <- data.frame(c(1,3,2,1,4,3,3), c('A','X','B','A','X','X','Y'))

if (anyDuplicated(DF)) print('Matched')
# [1] "Matched"
dww
  • 30,425
  • 5
  • 68
  • 111