0

I have 1 data set which is the base & there are 7 other datasets for 7 different years & for 3 different regions .These datasets include amount,region & year which is common to the base data.

However, I need to merge the 7 data sets 1 by one to the base dataset.How do I achieve that?

Base dataset:

company_region  raised_amount_usd   Year
 SF Bay Area    1000050 2011
 SF Bay Area    2520000 2011
 SF Bay Area    15000   2010
 Singapore  615000  2011

For Year 2007:

 raised_amount_usd  z   e   Year company_region 
1.00E+06    5   0          2007  Singapore
8.00E+06    6   1          2007  Singapore

50000 3 0 2007 Singapore 35000 3 0 2007 Singapore

& similarly I have data for the other years 2008-2012.I need columns z & e in my base data set.Instead of writing 7 merge statements how can it be done through a function?

Would be great if someone can help out.Thanks in advance!!

user6016731
  • 382
  • 5
  • 18

1 Answers1

0

If you want to keep the columns z and e, bind_rows() from the dplyr package seems to be the answer (see also here Combine two data frames by rows (rbind) when they have different sets of columns)

# Create example
a <- c(rep("SF Bay Area",3),"Singapore")
b <- c(1000050,2520000,15000,615000)
c <- c(2011,2010,2011,2011)
base <- cbind.data.frame(a,b,c,stringsAsFactors =F)
colnames(base) <- c("company_region","raised_amount_usd","Year")


a <- c(rep("Germany",4))
b <- c(100055,2524400,150020,68880)
c <- c(2007,2007,2007,2007)
e <- c(1,1,1,1)
z <- c(1,1,1,1)
data_germany <- cbind.data.frame(a,b,c,e,z,stringsAsFactors =F)
colnames(data_germany) <- c("company_region","raised_amount_usd","Year","e","z")

a <- c(rep("Italy",4))
b <- c(100055,2524400,150020,68880)
c <- c(2007,2007,2007,2007)
e <- c(1,1,1,1)
z <- c(1,1,1,1)
data_italy <- cbind.data.frame(a,b,c,e,z,stringsAsFactors =F)
colnames(data_italy) <- c("company_region","raised_amount_usd","Year","e","z")

# bin german and italian data at once with dplyr
library(dplyr)
base %>% 
  bind_rows(data_germany) %>% 
  bind_rows(data_italy) -> base

If you don't want to keep z and e, you can do something like this:

# Function to extent base dataframe
# base_df = base dataframe to extent
# add_df = dataframe that should be added to the base dataframe
fun_extent_data <- function(base_df,add_df) {

  library(dplyr)
  base_df <- base_df
  add_df <- add_df

  # Choose all necessary columns
  add_df %>%
    select(company_region,raised_amount_usd,Year) -> add_df_light

  # Bind the data to the base dataframe
  rbind.data.frame(base_df,add_df_light,stringsAsFactors = FALSE) -> base_df

  return(base_df)
}

# Use function 
fun_extent_data(base,data_germany) -> base

# Use function for german and italian data at once with dplyr
library(dplyr)
base %>%
  fun_extent_data(.,data_germany) %>%
  fun_extent_data(.,data_italy) -> base
Community
  • 1
  • 1
PhiSeu
  • 301
  • 2
  • 9