1

I have loaded as many text files as possible from a folder in my global environment

Now I want to merge them into one dataframe

As an example I mention 3 data set and a desire output

df1<- structure(list(V1 = structure(c(9L, 15L, 3L, 2L, 12L, 7L, 8L, 
10L, 4L, 5L, 1L, 6L, 16L, 11L, 14L, 13L, 17L), .Label = c("ASS1", 
"CLCN4", "CXorf56", "DNAL4", "ELAC2", "IPP", "MMP15", "MTMR14", 
"NIPSNAP3B", "NPR1", "POLR2J", "PWP2", "RGS4", "SEC23IP", "TF", 
"TMEM59", "UQCRC1"), class = "factor"), V2 = c(3.771321309, 4.121988898, 
5.555893632, 4.586876086, 6.279490572, 6.004261107, 6.613729673, 
5.185145989, 5.63567329, 5.785365957, 9.018526719, 5.734111507, 
9.809870554, 9.09813781, 5.643864005, 4.540559556, 9.375200415
)), .Names = c("V1", "V2"), class = "data.frame", row.names = c(NA, 
-17L))

second df like below

df2<- structure(list(V1 = structure(c(2L, 3L, 1L), .Label = c("CXorf56", 
"NIPSNAP3B", "TF"), class = "factor"), V2 = c(3.771321309, 4.121988898, 
5.555893632)), .Names = c("V1", "V2"), class = "data.frame", row.names = c(NA, 
-3L))

and the third df like below

df3<- structure(list(V1 = structure(c(6L, 7L, 11L, 17L, 3L, 2L, 14L, 
9L, 10L, 12L, 4L, 5L, 1L, 8L, 18L, 13L, 16L, 15L, 19L), .Label = c("ASS1", 
"CLCN4", "CXorf56", "DNAL4", "ELAC2", "EMX2", "FUS", "IPP", "MMP15", 
"MTMR14", "NIPSNAP3B", "NPR1", "POLR2J", "PWP2", "RGS4", "SEC23IP", 
"TF", "TMEM59", "UQCRC1"), class = "factor"), V2 = c(4.037370833, 
6.933306871, 3.771321309, 4.121988898, 5.555893632, 4.586876086, 
6.279490572, 6.004261107, 6.613729673, 5.185145989, 5.63567329, 
5.785365957, 9.018526719, 5.734111507, 9.809870554, 9.09813781, 
5.643864005, 4.540559556, 9.375200415)), .Names = c("V1", "V2"
), class = "data.frame", row.names = c(NA, -19L))

The three of them have some names in common and some not

I want to make a general name and then put zero values for those data that they dont have those names

a desire output is like below

output<- structure(list(V1 = structure(c(11L, 6L, 7L, 12L, 18L, 3L, 2L, 
15L, 9L, 10L, 13L, 4L, 5L, 1L, 8L, 19L, 14L, 17L, 16L, 20L), .Label = c("ASS1", 
"CLCN4", "CXorf56", "DNAL4", "ELAC2", "EMX2", "FUS", "IPP", "MMP15", 
"MTMR14", "names", "NIPSNAP3B", "NPR1", "POLR2J", "PWP2", "RGS4", 
"SEC23IP", "TF", "TMEM59", "UQCRC1"), class = "factor"), V2 = structure(c(19L, 
1L, 1L, 2L, 3L, 7L, 5L, 13L, 12L, 14L, 6L, 8L, 11L, 15L, 10L, 
18L, 16L, 9L, 4L, 17L), .Label = c("0", "3.771321309", "4.121988898", 
"4.540559556", "4.586876086", "5.185145989", "5.555893632", "5.63567329", 
"5.643864005", "5.734111507", "5.785365957", "6.004261107", "6.279490572", 
"6.613729673", "9.018526719", "9.09813781", "9.375200415", "9.809870554", 
"df1"), class = "factor"), V3 = structure(c(5L, 1L, 1L, 2L, 3L, 
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0", 
"3.771321309", "4.121988898", "5.555893632", "df2"), class = "factor"), 
    V4 = structure(c(20L, 2L, 15L, 1L, 3L, 7L, 5L, 13L, 12L, 
    14L, 6L, 8L, 11L, 16L, 10L, 19L, 17L, 9L, 4L, 18L), .Label = c("3.771321309", 
    "4.037370833", "4.121988898", "4.540559556", "4.586876086", 
    "5.185145989", "5.555893632", "5.63567329", "5.643864005", 
    "5.734111507", "5.785365957", "6.004261107", "6.279490572", 
    "6.613729673", "6.933306871", "9.018526719", "9.09813781", 
    "9.375200415", "9.809870554", "df3"), class = "factor")), .Names = c("V1", 
"V2", "V3", "V4"), class = "data.frame", row.names = c(NA, -20L
))

1 Answers1

0

Here's how I would approach this. Hope this helps. rbindlist() is as good as it gets!

I'm sure someone can maximize rename_list(), but you get the idea below.

library(tidyr)
library(data.table)

rename_list <- function(list){
  for(i in 1:length(list)){
  list[[i]] <- list[[i]]%>%
               magrittr::set_colnames(c("V1", paste("df", i, sep = "")))
  }
  return(list)
}


mget(ls(pattern = "df"))%>%
rename_list()%>%
rbindlist(idcol = TRUE)%>%
spread(.id, df1)


           V1      df1      df2      df3
 1:      ASS1 9.018527       NA 9.018527
 2:     CLCN4 4.586876       NA 4.586876
 3:   CXorf56 5.555894 5.555894 5.555894
 4:     DNAL4 5.635673       NA 5.635673
 5:     ELAC2 5.785366       NA 5.785366
 6:       IPP 5.734112       NA 5.734112
 7:     MMP15 6.004261       NA 6.004261
 8:    MTMR14 6.613730       NA 6.613730
 9: NIPSNAP3B 3.771321 3.771321 3.771321
10:      NPR1 5.185146       NA 5.185146
11:    POLR2J 9.098138       NA 9.098138
12:      PWP2 6.279491       NA 6.279491
13:      RGS4 4.540560       NA 4.540560
14:   SEC23IP 5.643864       NA 5.643864
15:        TF 4.121989 4.121989 4.121989
16:    TMEM59 9.809871       NA 9.809871
17:    UQCRC1 9.375200       NA 9.375200
18:      EMX2       NA       NA 4.037371
19:       FUS       NA       NA 6.933307
InfiniteFlash
  • 1,038
  • 1
  • 10
  • 22
  • can you please explain what you are doing? because when the files have different names this code does not work. I am trying to figure out how to change that –  Dec 27 '17 at 03:18
  • Well `mget(ls(pattern = "df"))` grabs all of the objects with "df" in their names and puts them in a list. – InfiniteFlash Dec 27 '17 at 03:19
  • so it means if they have df in their name, they will be gathered . what does this do spread(.id,df1) –  Dec 27 '17 at 03:21
  • spread reformats the the merged `data.frame()` into a format that what you want. Try to evaluate your code and data (specified in the OP) at each step. – InfiniteFlash Dec 27 '17 at 03:22
  • do you think this will work if we have two dataframe with several columns but we merge by only one ? –  Dec 27 '17 at 03:35
  • Well the renaming the columns part, no. You'll have to figure out how to do all of this. Now that im looking at my code, im not sure rename_list is even necessary. – InfiniteFlash Dec 27 '17 at 04:22
  • The same code should work, but id recommend you toy around. – InfiniteFlash Dec 27 '17 at 04:22