0

I have two df and I need to merge them.

df1 looks like this:

COUNTRY   YEAR   TRADE   
Spain     2016   276   
Germany   2016   323      
France    2016   392
Spain     2017   456   
Germany   2017   564      
France    2017   359
Spain     2015   767   
Germany   2015   868      
France    2015   969

df2 looks like this:

COUNTRY   GDP2016   GDP2017 GDP2015
Spain      1111       999    444
Germany    2222       888    555  
France     3333       777    666

With two GDP I could use:

df3 <- merge(df1,df2, by = "COUNTRY")

df3 <- df3 %>% mutate(GDP = ifelse(YEAR == 2016, GDP2016, GDP2017))
df3 <- subset(df3, select = -c(GDP2016, GDP2017)

Yet, with 3 GDP I have to use something different. What I want to get is:

COUNTRY   YEAR   TRADE    GDP 
Spain     2016   276      1111
Germany   2016   323      2222   
France    2016   392      3333
Spain     2017   456      999
Germany   2017   564      888      
France    2017   359      777
Spain     2015   767      444
Germany   2015   868      555      
France    2015   969      666

I would appreciate any help!

Leonardo
  • 5
  • 3
  • The last time I tried something like I'm suggesting it was downvoted, but maybe introduce a new ID column in both data sets? – NelsonGon Mar 19 '19 at 12:25
  • You can find your answer [here](https://stackoverflow.com/questions/1299871/how-to-join-merge-data-frames-inner-outer-left-right). – Earl Mascetti Mar 19 '19 at 12:26

4 Answers4

0

You have to melt df2 to put it in the same format as df1. Then I create a new column YEAR with gsub by removing the "GDP" part of the string and keeping only the year.

df2_melt <- melt(df2, id.vars="COUNTRY")
df2_melt$YEAR <- gsub(pattern = "GDP",replacement = "",x = df2_melt$variable)
colnames(df2_melt)[colnames(df2_melt)=="value"] <- "GDP"

df3 <- merge(df1,df2_melt, by = c("COUNTRY","YEAR"))

  COUNTRY YEAR TRADE variable  GDP
1  France 2016   392  GDP2016 3333
2  France 2017   359  GDP2017  777
3 Germany 2016   323  GDP2016 2222
4 Germany 2017   564  GDP2017  888
5   Spain 2016   276  GDP2016 1111
6   Spain 2017   456  GDP2017  999

DATA

df1 <- read.table(text="COUNTRY   YEAR   TRADE   
Spain     2016   276   
Germany   2016   323      
France    2016   392
Spain     2017   456   
Germany   2017   564      
France    2017   359
Spain     2015   767   
Germany   2015   868      
France    2015   969",header=TRUE, stringsAsFactors=FALSE)

df2 <- read.table(text="COUNTRY   GDP2016   GDP2017 GDP2018
Spain      1111       999    444
Germany    2222       888    555  
France     3333       777    6669",header=TRUE, stringsAsFactors=FALSE)
Pierre Lapointe
  • 16,017
  • 2
  • 43
  • 56
0

You could do:

library(tidyverse)

df1 %>%
  left_join(df2 %>%
              gather(YEAR, GDP, -COUNTRY) %>%
              mutate(YEAR = as.integer(sub("GDP", "", YEAR))),
            by = c("COUNTRY", "YEAR"))
arg0naut91
  • 14,574
  • 2
  • 17
  • 38
0

The issue is that the df2 is not in a structure which makes it easy to join, so I would change the structure using tidyr:

library(dplyr)
library(tidyr)

df3 <-
  df1 %>% 
  left_join(df2 %>% 
               gather(YEAR, GDP, -COUNTRY) %>% 
               mutate(YEAR = as.numeric(substr(YEAR, 4, 7))), 
             by = c("COUNTRY", "YEAR"))

Please note that this does not give your expected answer, because the years differ. In df1 there is a year of 2015, but in df2 there is data for GDB2018.

data used:

df1 <- tibble::tribble(
   ~COUNTRY, ~YEAR, ~TRADE,
    "Spain",  2016,    276,
  "Germany",  2016,    323,
   "France",  2016,    392,
    "Spain",  2017,    456,
  "Germany",  2017,    564,
   "France",  2017,    359,
    "Spain",  2015,    767,
  "Germany",  2015,    868,
   "France",  2015,    969
  )

df2 <- tibble::tribble(
   ~COUNTRY, ~GDP2016, ~GDP2017, ~GDP2018,
    "Spain",     1111,      999,      444,
  "Germany",     2222,      888,      555,
   "France",     3333,      777,      666
  )
Kerry Jackson
  • 1,821
  • 12
  • 20
0

data.table

sample data

library( data.table )
df1 <- fread("COUNTRY   YEAR   TRADE   
Spain     2016   276   
             Germany   2016   323      
             France    2016   392
             Spain     2017   456   
             Germany   2017   564      
             France    2017   359
             Spain     2015   767   
             Germany   2015   868      
             France    2015   969")

df2 <- fread("COUNTRY   GDP2016   GDP2017 GDP2015
Spain      1111       999    444
             Germany    2222       888    555  
             France     3333       777    666")

code

#first melt and modify df2
df3 <- melt(df2, id.vars = "COUNTRY", variable.name = "YEAR")[, YEAR := as.numeric(gsub("[^0-9]", "", YEAR))]
#then join
df1[ df3, GDP := i.value, on = .(COUNTRY, YEAR) ][]

#or use as oneliner
df1[ melt(df2, id.vars = "COUNTRY", variable.name = "YEAR")[, YEAR := as.numeric(gsub("[^0-9]", "", YEAR))], GDP := i.value, on = .(COUNTRY, YEAR) ][]

output

#    COUNTRY YEAR TRADE  GDP
# 1:   Spain 2016   276 1111
# 2: Germany 2016   323 2222
# 3:  France 2016   392 3333
# 4:   Spain 2017   456  999
# 5: Germany 2017   564  888
# 6:  France 2017   359  777
# 7:   Spain 2015   767  444
# 8: Germany 2015   868  555
# 9:  France 2015   969  666
Wimpel
  • 26,031
  • 1
  • 20
  • 37