0

I have to compare two tables and find the missing records between two tables in two different databases. (considering the ideal situation that records should be same in tableA and tableB). tableA_count and tableB_count are result of Select count(*). First I tried the way mentioned below and got results as expected.

if (tableA_count > tableB_count) {

    query1 <- paste0("SELECT * FROM   tableA (NOLOCK)")
    results1 <- sqlQuery(cn_ods, query)

    query2 <- paste0("SELECT * FROM tableB (NOLOCK)")
    results2 <- sqlQuery(cn_ad, compare16_2_join)
    test_anti_join <- anti_join(results1, results2, by=c("CLIENT_ID"="ClientID")
    rm(results1)
    rm(results2)
    View(head(test_anti_join))
  } else if (tableA_count < tableB_count) {


    query1 <- paste0("SELECT * FROM   tableA (NOLOCK)")
    results1 <- sqlQuery(cn_ods, query)

    query2 <- paste0("SELECT * FROM tableB (NOLOCK)")
    results2 <- sqlQuery(cn_ad, compare16_2_join)
    test_anti_join <- anti_join(results2, results1, by=c("ClientID"= "CLIENT_ID")
    rm(results1)
    rm(results2)
    View(head(test_anti_join))
  }

But, Keeping the spirit high and adding some value to my code, I tried to wrote a function for it . after many attempt, this was my most accurate code, but still I couldn't get any value returned from it.

Fun_Display_Missing_Recrods <- function(ods_count, app_count,ods_t, app_t, col_a, col_b) {

    if (ods_count > app_count) {

      query1 <- paste0("SELECT * FROM ", ods_t ," (NOLOCK)")
      results1 <- sqlQuery(cn_ods, query1)

      query2 <- paste0("SELECT * FROM ",app_t,"  (NOLOCK)")
      results2 <- sqlQuery(cn_ad, query2)

      test_anti_join1 <- anti_join(results1, results2, by= c(col_a = col_b))
      rm(query1)
      rm(query2)
      rm(results1)
      rm(results2)
      # Records not in tables ##
      View(test_anti_join1)

    } else if (ods_count < app_count)  {

      query1 <- paste0("SELECT * FROM ",ods_t," (NOLOCK)")
      results1 <- sqlQuery(cn_ods, query1)

      query2 <- paste0("SELECT * FROM ",app_t," (NOLOCK)")
      results2 <- sqlQuery(cn_ad, query2)


      test_anti_join1 <- anti_join(results2, results1, by= c(col_b = col_a))
      rm(query1)
      rm(query2)
      rm(results1)
      rm(results2)
      # Records not in tables ##
      View(test_anti_join1)
    }}

Any Guidance or any other better approach will be highly appreciated.

  • 1
    Cannot debug further as your example is not [reproducible](http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example), but one problem is the `by=c(col_a = col_b)`: this has the heading "col_a" instead of the *value* of the variable `col_a`. Try `x <- col_b; names(x) <- col_a` somewhere and then use `by=x`. (or `by=setNames(col_b, col_a)` if you do not want to create the variable `x`) – mathematical.coffee Nov 01 '16 at 01:18
  • I tried this but no luck. – Ravindra Kumar Nov 01 '16 at 01:46
  • Error in paste0("SELECT * FROM ", ods_t, " (NOLOCK)") : object 'test_data' not found this is the error that is returned. – Ravindra Kumar Nov 01 '16 at 01:47

0 Answers0