0
combine_cols<- function(primary,secondary,linker,column) {
require(data.table) 
a<-data.table("Sample"=primary[,linker], primary[,column])
b<-data.table("Sample"=secondary[,linker], secondary[,column])

c <- merge(a, b, by = "Sample", all=TRUE)
c[,Status := ifelse(!is.na(c[,paste0(column,".x")]), paste0(column,".x"), 
paste0(column,".y"))]
c[,`:=` (paste0(column,".x")=NULL, paste0(column,".y")= NULL)]

return(c)
}
mydata1<-data.frame("Sample"=c("100","101","102","103"),"Status"=c("Y","","","partial"))
mydata2<-data.frame("Sample"=c("100","101","102","103","106"),"Status"=c("NA","Y","","","Y"))
print((combine_cols(mydata1,mydata2,"Sample",c("Status"))))

I'm trying to create a function to merge columns of split data. The ifelse line isn't working because the paste0(column,".x") is recognized as a character and not a column name. How can I ensure that c[,paste0(column,".x")] reflects c$c[,paste0(column,".x")] ? Better yet, how can I modify this line to handle a list of column names?

Sotos
  • 51,121
  • 6
  • 32
  • 66
sm002
  • 101
  • 1
  • 10

1 Answers1

0

Just work with standard names and rename after, it'll be much more readable too.

a<-data.table("Sample"=primary[,linker], "tempname" =primary[,column])        # added tempname
b<-data.table("Sample"=secondary[,linker], "tempname" =secondary[,column])    # added tempname
c <- merge(a, b, by = "Sample", all=TRUE)
c[,Status := ifelse(!is.na(tempname.x),tempname.x,tempname.y)]
setnames(c,paste0("tempname",c(".x",".y")),paste0(column,c(".x",".y")))

with your example:

   Sample Status.x Status.y Status
1:    100        Y       NA      3
2:    101                 Y      1
3:    102                        1
4:    103  partial               2
5:    106       NA        Y      3

I have no idea what the following line (before return) is supposed to do, and it will fail, but as it's not part of the question (yet), here goes.

moodymudskipper
  • 46,417
  • 11
  • 121
  • 167