I want to update join two tables within a function. Here is an example without using a function:
library(data.table)
Xtest <- data.table(a = rnorm(20), b = rnorm(20), c = 1:20)
Ytest <- data.table(c = 1:10, d = rnorm(10))
Xtest[Ytest, on = .(c), newcol := i.d]
# > Xtest[Ytest, on = .(c), newcol := i.d]
# > Xtest
# a b c newcol
# 1: -1.68473343 -0.74498296 1 0.35096663
# 2: -0.98461614 2.15317525 2 -1.33890396
# 3: -1.65427602 1.21183896 3 1.49641480
# 4: -0.65045253 -0.74609860 4 -0.03227097
# 5: 1.49058508 1.20315276 5 1.41580186
# 6: -0.31631871 0.68716871 6 -0.03671959
# 7: 1.35923085 -0.20082238 7 -2.27959124
# 8: -0.75649545 0.24058212 8 0.93770862
# 9: 0.22452260 -0.28212892 9 -0.02500419
# 10: 0.30209786 1.33697797 10 0.67729741
# 11: 0.88748221 -0.54421418 11 NA
# 12: 0.47207422 -0.28159382 12 NA
# 13: -1.17270475 0.83940750 13 NA
# 14: -2.02787820 -0.03672582 14 NA
# 15: -0.22187761 0.59137210 15 NA
# 16: 0.97750330 -0.27030756 16 NA
# 17: 0.22725940 0.54617488 17 NA
# 18: 0.94065525 -0.23482152 18 NA
# 19: 2.12049977 0.69920776 19 NA
# 20: 0.06192823 0.12262739 20 NA
# Xtest[, newcol := NULL]
I tried to recast the above code into a function, but the Ycol argument seems to be hidden:
myjoinfunction <- function(X, Y, joinlist, newcol, Ycol) {
eval(substitute(X[Y, on = joinlist, newcol:=i.Ycol]))
}
# > myjoinfunction <- function(X, Y, joinlist, newcol, Ycol) {
# + eval(substitute(X[Y, on = joinlist, newcol:=i.Ycol]))
# + }
# > myjoinfunction(Xtest, Ytest, list(c), D, d)
# Error in eval(jsub, SDenv, parent.frame()) : object 'i.Ycol' not found
# 8.
# eval(jsub, SDenv, parent.frame())
# 7.
# eval(jsub, SDenv, parent.frame())
# 6.
# `[.data.table`(Xtest, Ytest, on = list(c), `:=`(D, i.Ycol))
# 5.
# Xtest[Ytest, on = list(c), `:=`(D, i.Ycol)]
# 4.
# eval(substitute(X[Y, on = joinlist, `:=`(newcol, i.Ycol)]))
# 3.
# eval(substitute(X[Y, on = joinlist, `:=`(newcol, i.Ycol)]))
# 2.
# eval(substitute(X[Y, on = joinlist, `:=`(newcol, i.Ycol)]))
# 1.
# myjoinfunction(Xtest, Ytest, list(c), D, d)
How can I make the Ycol argument visible within the function?
UPDATED EXAMPLE
To be more general, here is a new example of an update join I would like to write a function for:
Xtest <- data.table(
a = rnorm(20),
b = rnorm(20),
c = rep(letters[1:4], rep(5, 4)),
d = rep(1:5, 4)
)
Ytest <- data.table(
c = rep(letters[1:2], rep(5, 2)),
d = rep(1:5, 2),
e = rnorm(10),
f = rnorm(10)
)
# > Xtest[Ytest, on = .(c, d), `:=`(newcol1 = i.e, newcol2 = i.f) ]
# > Xtest
# a b c d newcol1 newcol2
# 1: -2.4939743 -0.200370619 a 1 -1.4934893 -1.0288955
# 2: 1.0188321 -1.182286508 a 2 1.3811712 0.9747131
# 3: 0.5217161 -0.152117649 a 3 -0.4168069 0.1218213
# 4: -0.1584167 0.583640353 a 4 0.4644738 1.7888567
# 5: -0.4271398 0.020067301 a 5 2.5279998 2.0919953
# 6: -1.7692909 0.250129040 b 1 -1.5964246 -1.0884861
# 7: -0.8899915 0.971742055 b 2 0.3011304 1.2629524
# 8: -0.4490363 -1.540005621 b 3 -0.7992208 -0.5155775
# 9: -0.5706488 -1.037077614 b 4 1.0058213 1.9787692
# 10: -0.0922679 1.444487848 b 5 -0.2893311 -0.6095043
# 11: 0.9924810 -1.144513228 c 1 NA NA
# 12: 1.2232591 1.503649791 c 2 NA NA
# 13: 0.8751961 0.892765910 c 3 NA NA
# 14: 0.9960554 0.499310073 c 4 NA NA
# 15: -0.6184695 1.867985589 c 5 NA NA
# 16: 0.6503936 0.422683211 d 1 NA NA
# 17: -0.6160834 -1.585713893 d 2 NA NA
# 18: 1.5949931 -0.544704857 d 3 NA NA
# 19: 0.7232079 -0.006460518 d 4 NA NA
# 20: -0.2824961 0.119585859 d 5 NA NA
Using a suggestion from David Arenburg in the comments trips up when joinlist is a list of strings
myjoinfunction1 <- function(X, Y, joinlist, newcol, Ycol) X[Y, on = joinlist, newcol:= get(paste0("i.", Ycol))]
# > myjoinfunction1(Xtest, Ytest, list("c", "d"), "newcol", "e")
# Error in .parse_on(substitute(on), isnull_inames) :
# 'on' argument should be a named atomic vector of column names indicating which columns in 'i' should be joined with which columns in 'x'.
# 5.
# stop("'on' argument should be a named atomic vector of column names indicating which columns in 'i' should be joined with which columns in 'x'.")
# 4.
# .parse_on(substitute(on), isnull_inames)
# 3.
# `[.data.table`(X, Y, on = joinlist, `:=`(newcol, get(paste0("i.",
# Ycol))))
# 2.
# X[Y, on = joinlist, `:=`(newcol, get(paste0("i.", Ycol)))]
# 1.
# myjoinfunction1(Xtest, Ytest, list("c", "d"), "newcol", "e")