0

I am validating two data frames if they are consistent, its working on small dataframes perfectly but when records of data frame increases then it shows error


library(tidyverse)
df1 <- data.frame(MAN=c(6,6,4,6,8,6,8,4,4,6,6,8,8),MANi=c("OD","NY","CA","CA","OD","CA","OD","NY","OL","NY","OD","CA","OD"),
                  nune=c("akas","mani","juna","mau","nuh","kil","kman","nuha","huna","kman","nuha","huna","mani"),
                  klay=c(1,2,2,1,1,2,1,2,1,2,1,1,2),emial=c("dd","xyz","abc","dd","xyz","abc","dd","xyz","abc","dd","xyz","abc","dd"),Pass=c("Low","High","Low","Low","High","Low","High","High","Low","High","High","High","Low"),fri=c("KKK","USA","IND","SRI","PAK","CHI","JYP","TGA","KKK","USA","IND","SRI","PAK"),
                  mkl=c("m","f","m","m","f","m","m","f","m","m","f","m","m"),kin=c("Sent","Rec","Sent","Rec","Sent","Rec","Sent","Rec","Sent","Rec","Rec","Sent","Rec"),munc=c("Car","Bus","Truk","Cyl","Bus","Car","Bus","Bus","Bus","Car","Car","Cyl","Car"),
                  lone=c("Sr","jun","sr","jun","man","man","jr","Sr","jun","sr","jun","man","man"),wond=c("tko","kent","bho","kilt","kent","bho","kent","bho","bho","kilt","kent","bho","kilt"))

df2 <- data.frame(MAN=c(6,6,4,6,8,6,8,4,4,6,6,8,8,8,6),MANi=c("OD","NY","CA","CA","OD","CA","OD","NY","OL","ny","OD","CA","OD","NY","OL"),
                  nune=c("akas","mani","juna","mau","nuh","kil","kman","nuha","huna","kman","nuha","huna","mani","juna","mau"),
                  klay=c(1,2,2,1,1,2,1,2,1,2,1,1,2,2,1),emial=c("dd","xyz","ABC","dd","xyz","ABC","dd","xyz","ABC","dd","xyz","ABC","dd","xyz","ABC"),Pass=c("Low","High","Low","Low","High","Low","High","High","Low","High","High","High","Low","High","High"),fri=c("KKK","USA","IND","SRI","PAK","CHI","JYP","TGA","KKK","USA","IND","SRI","PAK","CHI","JYP"),
                  mkl=c("male","female","male","male","female","male","male","female","male","male","female","male","male","female","male"),kin=c("Sent","Rec","Sent","Rec","Sent","Rec","Sent","Rec","Sent","Rec","Rec","Sent","Rec","Sent","Rec"),munc=c("Car","Bus","Truk","Cyl","Bus","Car","Bus","Bus","Bus","Car","Car","Cyl","Car","Bus","Bus"),
                  lone=c("Sr","jun","sr","jun","man","man","jr","Sr","jun","sr","jun","man","man","jr","man"),wond=c("tko","kent","bho","kilt","kent","bho","kent","bho","bho","kilt","kent","bho","kilt","kent","bho"))


potro
  • 119
  • 5
  • Please define "consistent". Do you mean simply every column appears in both data frames and, for each column, every value that appears in one data frame also appears in the other? Or something else? Perhaps that every value in every column and every row has to be the same in both data frames? – Limey May 20 '22 at 13:59
  • Does this answer your question? [R memory management / cannot allocate vector of size n Mb](https://stackoverflow.com/questions/5171593/r-memory-management-cannot-allocate-vector-of-size-n-mb) – Basti May 20 '22 at 13:59
  • @Limey, actually i am validating all the columns names and their options in first database to second database. for for example in first dataframe column gender have "Male" and "female" and in second database have gender "M" and "F" like this. so this can be happen for many columns that's. – potro May 20 '22 at 14:19
  • for instance i got a database from team and after sometime i got second database from team so i want to check what are the things changed in second database as compared to first database or vice versa. – potro May 20 '22 at 14:21
  • So, given your test input, what is your expected output? – Limey May 20 '22 at 14:26
  • @Limey I have updated the required output – potro May 21 '22 at 10:39

1 Answers1

0

Worth considering waldo::compare?

df1 <- data.frame(MAN=c(6,6,4,6,8,6,8,4,4,6,6,8,8),MANi=c("OD","NY","CA","CA","OD","CA","OD","NY","OL","NY","OD","CA","OD"),
                  nune=c("akas","mani","juna","mau","nuh","kil","kman","nuha","huna","kman","nuha","huna","mani"),
                  klay=c(1,2,2,1,1,2,1,2,1,2,1,1,2),emial=c("dd","xyz","abc","dd","xyz","abc","dd","xyz","abc","dd","xyz","abc","dd"),Pass=c("Low","High","Low","Low","High","Low","High","High","Low","High","High","High","Low"),fri=c("KKK","USA","IND","SRI","PAK","CHI","JYP","TGA","KKK","USA","IND","SRI","PAK"),
                  mkl=c("m","f","m","m","f","m","m","f","m","m","f","m","m"),kin=c("Sent","Rec","Sent","Rec","Sent","Rec","Sent","Rec","Sent","Rec","Rec","Sent","Rec"),munc=c("Car","Bus","Truk","Cyl","Bus","Car","Bus","Bus","Bus","Car","Car","Cyl","Car"),
                  lone=c("Sr","jun","sr","jun","man","man","jr","Sr","jun","sr","jun","man","man"),wond=c("tko","kent","bho","kilt","kent","bho","kent","bho","bho","kilt","kent","bho","kilt"))

df2 <- data.frame(MAN=c(6,6,4,6,8,6,8,4,4,6,6,8,8,8,6),MANi=c("OD","NY","CA","CA","OD","CA","OD","NY","OL","ny","OD","CA","OD","NY","OL"),
                  nune=c("akas","mani","juna","mau","nuh","kil","kman","nuha","huna","kman","nuha","huna","mani","juna","mau"),
                  klay=c(1,2,2,1,1,2,1,2,1,2,1,1,2,2,1),emial=c("dd","xyz","ABC","dd","xyz","ABC","dd","xyz","ABC","dd","xyz","ABC","dd","xyz","ABC"),Pass=c("Low","High","Low","Low","High","Low","High","High","Low","High","High","High","Low","High","High"),fri=c("KKK","USA","IND","SRI","PAK","CHI","JYP","TGA","KKK","USA","IND","SRI","PAK","CHI","JYP"),
                  mkl=c("male","female","male","male","female","male","male","female","male","male","female","male","male","female","male"),kin=c("Sent","Rec","Sent","Rec","Sent","Rec","Sent","Rec","Sent","Rec","Rec","Sent","Rec","Sent","Rec"),munc=c("Car","Bus","Truk","Cyl","Bus","Car","Bus","Bus","Bus","Car","Car","Cyl","Car","Bus","Bus"),
                  lone=c("Sr","jun","sr","jun","man","man","jr","Sr","jun","sr","jun","man","man","jr","man"),wond=c("tko","kent","bho","kilt","kent","bho","kent","bho","bho","kilt","kent","bho","kilt","kent","bho"))

waldo::compare(df1, df2)
#> `attr(old, 'row.names')[11:13]`: 11 12 13      
#> `attr(new, 'row.names')[11:15]`: 11 12 13 14 15
#> 
#> old vs new
#>             MAN MANi nune klay emial Pass fri    mkl  kin munc lone wond
#> - old[1, ]    6   OD akas    1   dd  Low  KKK m      Sent Car   Sr  tko 
#> + new[1, ]    6   OD akas    1   dd  Low  KKK male   Sent Car   Sr  tko 
#> - old[2, ]    6   NY mani    2   xyz High USA f      Rec  Bus   jun kent
#> + new[2, ]    6   NY mani    2   xyz High USA female Rec  Bus   jun kent
#> - old[3, ]    4   CA juna    2   abc Low  IND m      Sent Truk  sr  bho 
#> + new[3, ]    4   CA juna    2   ABC Low  IND male   Sent Truk  sr  bho 
#> - old[4, ]    6   CA mau     1   dd  Low  SRI m      Rec  Cyl   jun kilt
#> + new[4, ]    6   CA mau     1   dd  Low  SRI male   Rec  Cyl   jun kilt
#> - old[5, ]    8   OD nuh     1   xyz High PAK f      Sent Bus   man kent
#> + new[5, ]    8   OD nuh     1   xyz High PAK female Sent Bus   man kent
#> - old[6, ]    6   CA kil     2   abc Low  CHI m      Rec  Car   man bho 
#> + new[6, ]    6   CA kil     2   ABC Low  CHI male   Rec  Car   man bho 
#> - old[7, ]    8   OD kman    1   dd  High JYP m      Sent Bus   jr  kent
#> + new[7, ]    8   OD kman    1   dd  High JYP male   Sent Bus   jr  kent
#> - old[8, ]    4   NY nuha    2   xyz High TGA f      Rec  Bus   Sr  bho 
#> + new[8, ]    4   NY nuha    2   xyz High TGA female Rec  Bus   Sr  bho 
#> - old[9, ]    4   OL huna    1   abc Low  KKK m      Sent Bus   jun bho 
#> + new[9, ]    4   OL huna    1   ABC Low  KKK male   Sent Bus   jun bho 
#> - old[10, ]   6   NY kman    2   dd  High USA m      Rec  Car   sr  kilt
#> + new[10, ]   6   ny kman    2   dd  High USA male   Rec  Car   sr  kilt
#> and 5 more ...
#> 
#> `old$MAN[11:13]`: 6 8 8    
#> `new$MAN[11:15]`: 6 8 8 8 6
#> 
#> `old$MANi[10:13]`: "NY"                          "OD" "CA" "OD"
#> `new$MANi[7:15]`:  "OD" "NY" "OL" "ny" "OD" "CA" "OD" "NY" "OL"
#> 
#> `old$nune[11:13]`: "nuha" "huna" "mani"             
#> `new$nune[11:15]`: "nuha" "huna" "mani" "juna" "mau"
#> 
#> `old$klay[11:13]`: 1 1 2    
#> `new$klay[11:15]`: 1 1 2 2 1
#> 
#>      old$emial | new$emial               
#>  [2] "xyz"     - "dd"      [1]           
#>  [3] "abc"     - "xyz"     [2]           
#>  [4] "dd"      - "ABC"     [3]           
#>  [5] "xyz"     - "dd"      [4]           
#>  [6] "abc"     - "xyz"     [5]           
#>  [7] "dd"      - "ABC"     [6]           
#>  [8] "xyz"     - "dd"      [7]           
#>  [9] "abc"     - "xyz"     [8]           
#> [10] "dd"      - "ABC"     [9]           
#> [11] "xyz"     - "dd"      [10]          
#>  ... ...         ...       and 5 more ...
#> 
#> `old$Pass[11:13]`: "High" "High" "Low"              
#> `new$Pass[11:15]`: "High" "High" "Low" "High" "High"
#> 
#> `old$fri[11:13]`: "IND" "SRI" "PAK"            
#> `new$fri[11:15]`: "IND" "SRI" "PAK" "CHI" "JYP"
#> 
#>      old$mkl | new$mkl                
#>  [1] "m"     - "male"   [1]           
#>  [2] "f"     - "female" [2]           
#>  [3] "m"     - "male"   [3]           
#>  [4] "m"     - "male"   [4]           
#>  [5] "f"     - "female" [5]           
#>  [6] "m"     - "male"   [6]           
#>  [7] "m"     - "male"   [7]           
#>  [8] "f"     - "female" [8]           
#>  [9] "m"     - "male"   [9]           
#> [10] "m"     - "male"   [10]          
#>  ... ...       ...      and 5 more ...
#> 
#> And 4 more differences ...

Created on 2022-05-21 by the reprex package (v2.0.1)

Or the daff package for highlighted sortable / filterable differences:

library(daff)

diffs <- diff_data(df1, df2)
  
render_diff(diffs)
Carl
  • 4,232
  • 2
  • 12
  • 24
  • actually i need output in my required format so that i can validate with filtering the database – potro May 21 '22 at 17:09