I've received a great answer of how to substitute a formula. I need it for matching a list of data.frames.
When matching with MatchIt::matchit()
, first, I have to save the result as a matchit.full / matchit
class. Second, with match.data()
data.frames w/ only matched observations are to be created.
The issue appears in the second step which is working fine when I use a formula as usual. For substitution now it seems that match.data()
needs to recognize the formula somehow but it won't.
Consider this as an example (the warnings can be ignored):
# example list
library(car)
WeightLoss1 <- WeightLoss
WeightLoss1$group <- as.integer(ifelse(WeightLoss1$group == "Control", 0, 1))
WL = list(WeightLoss1, WeightLoss1, WeightLoss1) # doesn't make much sense, but suffices for example
# substitute formula
wl.cov <- c("wl1", "se1")
WL.FM <- reformulate(wl.cov, response = "group")
# matching w/o substitution
m.match.0 <- lapply(1:length(WL), function(mark) {
require(MatchIt)
matchit(group ~ wl1 + se1, data = WL[[mark]])
})
# matching w/ substitution
m.match.1 <- lapply(1:length(WL), function(mark) {
require(MatchIt)
matchit(WL.FM, data = WL[[mark]])
})
# now compare both attempts to create list of data.frames
# w/o
match <- lapply(1:length(m.match.0), function(i){
require(MatchIt)
match.data(m.match.0[[i]])
})
# w/
match <- lapply(1:length(m.match.1), function(i){
require(MatchIt)
match.data(m.match.1[[i]])
})
As can be seen attempt w/o substitution is working fine, attempt w/ substitution produces an error Error in eval(object$call$data, envir = env) : object 'mark' not found
.
How could this be patched?
--
Notes:
> match.data
function (object, group = "all", distance = "distance", weights = "weights",
subclass = "subclass")
{
if (!is.null(object$model)) {
env <- attributes(terms(object$model))$.Environment
}
else {
env <- parent.frame()
}
data <- eval(object$call$data, envir = env)
treat <- object$treat
wt <- object$weights
vars <- names(data)
if (distance %in% vars)
stop("invalid input for distance. choose a different name.")
else if (!is.null(object$distance)) {
dta <- data.frame(cbind(data, object$distance))
names(dta) <- c(names(data), distance)
data <- dta
}
if (weights %in% vars)
stop("invalid input for weights. choose a different name.")
else if (!is.null(object$weights)) {
dta <- data.frame(cbind(data, object$weights))
names(dta) <- c(names(data), weights)
data <- dta
}
if (subclass %in% vars)
stop("invalid input for subclass. choose a different name.")
else if (!is.null(object$subclass)) {
dta <- data.frame(cbind(data, object$subclass))
names(dta) <- c(names(data), subclass)
data <- dta
}
if (group == "all")
return(data[wt > 0, ])
else if (group == "treat")
return(data[wt > 0 & treat == 1, ])
else if (group == "control")
return(data[wt > 0 & treat == 0, ])
else stop("error: invalid input for group.")
}
<bytecode: 0x00000000866125e0>
<environment: namespace:MatchIt>
> matchit
function (formula, data, method = "nearest", distance = "logit",
distance.options = list(), discard = "none", reestimate = FALSE,
...)
{
mcall <- match.call()
if (is.null(data))
stop("Dataframe must be specified", call. = FALSE)
if (!is.data.frame(data)) {
stop("Data must be a dataframe", call. = FALSE)
}
if (sum(is.na(data)) > 0)
stop("Missing values exist in the data")
ischar <- rep(0, dim(data)[2])
for (i in 1:dim(data)[2]) if (is.character(data[, i]))
data[, i] <- as.factor(data[, i])
if (!is.numeric(distance)) {
fn1 <- paste("distance2", distance, sep = "")
if (!exists(fn1))
stop(distance, "not supported.")
}
if (is.numeric(distance)) {
fn1 <- "distance2user"
}
fn2 <- paste("matchit2", method, sep = "")
if (!exists(fn2))
stop(method, "not supported.")
tryerror <- try(model.frame(formula), TRUE)
if (distance %in% c("GAMlogit", "GAMprobit", "GAMcloglog",
"GAMlog", "GAMcauchit")) {
requireNamespace("mgcv")
tt <- terms(mgcv::interpret.gam(formula)$fake.formula)
}
else {
tt <- terms(formula)
}
attr(tt, "intercept") <- 0
mf <- model.frame(tt, data)
treat <- model.response(mf)
X <- model.matrix(tt, data = mf)
if (method == "exact") {
distance <- out1 <- discarded <- NULL
if (!is.null(distance))
warning("distance is set to `NULL' when exact matching is used.")
}
else if (is.numeric(distance)) {
out1 <- NULL
discarded <- discard(treat, distance, discard, X)
}
else {
if (is.null(distance.options$formula))
distance.options$formula <- formula
if (is.null(distance.options$data))
distance.options$data <- data
out1 <- do.call(fn1, distance.options)
discarded <- discard(treat, out1$distance, discard, X)
if (reestimate) {
distance.options$data <- data[!discarded, ]
distance.options$weights <- distance.options$weights[!discarded]
tmp <- out1
out1 <- do.call(fn1, distance.options)
tmp$distance[!discarded] <- out1$distance
out1$distance <- tmp$distance
}
distance <- out1$distance
}
if (fn1 == "distance2mahalanobis") {
is.full.mahalanobis <- TRUE
}
else {
is.full.mahalanobis <- FALSE
}
out2 <- do.call(fn2, list(treat, X, data, distance = distance,
discarded, is.full.mahalanobis = is.full.mahalanobis,
...))
if (fn1 == "distance2mahalanobis") {
distance[1:length(distance)] <- NA
class(out2) <- c("matchit.mahalanobis", "matchit")
}
out2$call <- mcall
out2$model <- out1$model
out2$formula <- formula
out2$treat <- treat
if (is.null(out2$X)) {
out2$X <- X
}
out2$distance <- distance
out2$discarded <- discarded
nn <- matrix(0, ncol = 2, nrow = 4)
nn[1, ] <- c(sum(out2$treat == 0), sum(out2$treat == 1))
nn[2, ] <- c(sum(out2$treat == 0 & out2$weights > 0), sum(out2$treat ==
1 & out2$weights > 0))
nn[3, ] <- c(sum(out2$treat == 0 & out2$weights == 0 & out2$discarded ==
0), sum(out2$treat == 1 & out2$weights == 0 & out2$discarded ==
0))
nn[4, ] <- c(sum(out2$treat == 0 & out2$weights == 0 & out2$discarded ==
1), sum(out2$treat == 1 & out2$weights == 0 & out2$discarded ==
1))
dimnames(nn) <- list(c("All", "Matched", "Unmatched", "Discarded"),
c("Control", "Treated"))
out2$nn <- nn
return(out2)
}
<bytecode: 0x0000000086d6e158>
<environment: namespace:MatchIt>