0

I'm attempting to merge two datasets based on borrower_country and year below. The first dataset is a part of a larger set with multiple countries and I'm trying to fill in the NA values of the first set test with the values from the set gdp_infl. For some reason it just won't merge and the columns GDP_per_capita_USD, GDP_per_capita_growth and inflation stay as NAs.

Also as an additional detail, this is a part of a larger merge where I combined the gdp_infl data of multiple countries with my main dataset. All other countries worked without a problem but for some reason Japan doesn't. I'll insert the code I've tried to the bottom.

structure(list(borrower_country = c("Japan", "Japan", "Japan", 
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan", 
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan", 
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan", 
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan", 
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan", 
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan", 
"Japan", "Japan", "Japan", "Japan", "Japan"), year = c(1999L, 
1999L, 1999L, 1999L, 1999L, 1999L, 2000L, 1999L, 1999L, 1999L, 
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 
1999L, 1999L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2001L, 
2001L, 2001L, 2001L, 2001L), GDP_per_capita_USD = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), GDP_per_capita_growth = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_), inflation = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
)), row.names = c(NA, 50L), class = "data.frame")

structure(list(borrower_country = c("Japan", "Japan", "Japan", 
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan", 
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan", 
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan", 
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan", 
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan", 
"Japan"), year = 1982:2020, GDP_per_capita_growth = c(2.6, 2.81, 
3.83, 4.58, 2.78, 4.23, 6.34, 4.44, 4.55, 3.01, 0.47, -0.84, 
0.71, 2.5, 2.87, 0.84, -1.4, -0.43, 2.61, 0.16, -0.11, 1.31, 
2.17, 1.65, 1.36, 1.54, -1.14, -5.4, 4.17, 0.07, 1.66, 2.15, 
0.51, 1.33, 0.64, 2.34, 0.53, 0.48, NA), GDP_per_capita_USD = c(9575.61, 
10421.21, 10978.92, 11576.69, 17113.26, 20748.99, 25059.01, 24822.78, 
25371.46, 28915.01, 31414.98, 35681.96, 39200.49, 43428.94, 38436.93, 
35021.72, 31902.77, 36026.56, 38532.04, 33846.47, 32289.35, 34808.39, 
37688.72, 37217.65, 35433.99, 35275.23, 39339.3, 40855.18, 44507.68, 
48168, 48603.48, 40454.45, 38109.41, 34524.47, 38761.82, 38386.51, 
39159.42, 40113.06, NA), inflation = c(2.74, 1.9, 2.26, 2.03, 
0.6, 0.13, 0.68, 2.27, 3.08, 3.25, 1.76, 1.24, 0.7, -0.13, 0.14, 
1.75, 0.66, -0.34, -0.68, -0.74, -0.92, -0.26, -0.01, -0.28, 
0.25, 0.06, 1.38, -1.35, -0.72, -0.27, -0.05, 0.35, 2.76, 0.79, 
-0.12, 0.47, 0.98, 0.48, -0.02)), row.names = c(NA, -39L), class = "data.frame")

The code

test <- df
  test$GDP_per_capita_growth[test$borrower_country == "Japan"] <- NA
  test$GDP_per_capita_USD[test$borrower_country == "Japan"] <- NA
  test$inflation[test$borrower_country == "Japan"] <- NA
  
  test <- test %>%
    filter(borrower_country == "Japan")%>%
    select(borrower_country,year,GDP_per_capita_USD,GDP_per_capita_growth,inflation)

  temp <- merge(test,gdp_infl, by = c("borrower_country", "year","GDP_per_capita_USD","GDP_per_capita_growth","inflation"), all.x = TRUE)

I can't seem to solve this and any help would be much appreciated.

Moz
  • 27
  • 5

1 Answers1

1

You may try natural_join from rqdatatable package

natural_join(df1, df2, by = c("borrower_country", "year"))

    borrower_country year GDP_per_capita_growth GDP_per_capita_USD inflation
 1:            Japan 1999                 -0.43           36026.56     -0.34
 2:            Japan 1999                 -0.43           36026.56     -0.34
 3:            Japan 1999                 -0.43           36026.56     -0.34
 4:            Japan 1999                 -0.43           36026.56     -0.34
 5:            Japan 1999                 -0.43           36026.56     -0.34
 6:            Japan 1999                 -0.43           36026.56     -0.34
 7:            Japan 1999                 -0.43           36026.56     -0.34
 8:            Japan 1999                 -0.43           36026.56     -0.34
 9:            Japan 1999                 -0.43           36026.56     -0.34
Park
  • 14,771
  • 6
  • 10
  • 29