1

Dataset1:

id1  id2  abc  n  
  1  111  yes  2    
  2  121  no   1   
  3  122  yes  2  
  4  224  no   2    
  5  441  no   3   
  6  665  yes  1     

Dataset2:

id1  id2  age gen  
  1  111  45  m   
  1  111  46  f  
  2    1  52  f  
121  122  41  f    
121  122  44  m  
  4  224  54  f  
  4  221  56  m  
  5  441  44  m  
  5  441  45  f  
  5  441  58  f    
  6  665  54  f    

I have two data sets. Both are linked by id1 and id2. How to identify those data from both data sets which fails to link???

1 Answers1

1

We can use anti_join from the package to filter the rows with no match.

library(dplyr)

Dataset1_anti <- Dataset1 %>% anti_join(Dataset2, by = c("id1", "id2"))

Dataset1_anti
#   id1 id2 abc n
# 1   2 121  no 1
# 2   3 122 yes 2

Dataset2_anti <- Dataset2 %>% anti_join(Dataset1, by = c("id1", "id2"))

Dataset2_anti
#   id1 id2 age gen
# 1   2   1  52   f
# 2 121 122  41   f
# 3 121 122  44   m
# 4   4 221  56   m

DATA

Dataset1 <- read.table(text = "id1  id2  abc  n  
  1  111  yes  2    
                       2  121  no   1   
                       3  122  yes  2  
                       4  224  no   2    
                       5  441  no   3   
                       6  665  yes  1  ",
                       header = TRUE, stringsAsFactors = FALSE)

Dataset2 <- read.table(text = "id1  id2  age gen  
  1  111  45  m   
  1  111  46  f  
  2    1  52  f  
121  122  41  f    
121  122  44  m  
  4  224  54  f  
  4  221  56  m  
  5  441  44  m  
  5  441  45  f  
  5  441  58  f    
  6  665  54  f ",
                       header = TRUE, stringsAsFactors = FALSE)
www
  • 38,575
  • 12
  • 48
  • 84