I am new to R and I got a hold of this program I am trying to run. But I am getting error in the variable "outcome01". It could be that if I (somehow) fix this variable, there would be other similar errors. Any help is appreciated; Here is the code:
library(norm)
# Make appropriate changes in file and variable names.
setwd ("c:\\Users\\Dave Desktop\\Dropbox\\Webs\\StatPages\\More_Stuff\\Missing_Data")
x <- read.table("survrateMissingNA.dat", header = TRUE)
y <- as.matrix(x) #convert table to matrix
cat("Logistic regression using 132 cases and missing data \n\n")
print(summary(glm(formula = outcome01~survrate + gsi + avoid + intrus, binomial, data = x))) #Use original data with missing values
attach(x)
##########
## Important The following code will run m = 5 times. The data will be concatenated into ComFile and then analyzed.
# Data Augmentation using norm.R
m <- 5 #Number of imputations
k <- 9 #Number of variables in raw data file
l <- 5 #Number of variables actually used in regression
CombFile <- matrix(nrow = 0, ncol = k)
for (i in 1:m) {
s <- prelim.norm(y) #get preliminary statistics for the analysis
thetahat <-em.norm(s) #Get MLE for start value
rngseed(25672)
theta <- da.norm(s, thetahat, steps=200, showits=TRUE) # GET MLE
getparam.norm(s, theta) #Print out those results
impdata <-imp.norm(s, theta, y) #Impute the data
filename <- paste("CombFile", i, sep = "")
CombFile <- rbind(CombFile, impdata)
write(t(impdata), file = "impsurvrate", ncolumns = 9, sep = " ")
z <- data.frame(impdata)
z$outcome01 <- round(z$outcome01, digits = 0)
summary((glm(formula = outcome01~survrate + gsi + avoid + intrus, binomial, data = z))) #Use imputed data.
}
## Creating the final data file with imputed data 660 rows
nPerImp <- nrow(CombFile)/m
imps <- rep(1:m, each = nPerImp)
# Add a variable representing the imputation number.
data <- as.data.frame(cbind(imps, CombFile))
data$outcome01 <- round(data$outcome01, digits = 0)
# head(data)
attach(data)
## Set up variables to hold results
b <- matrix(NA,nrow = m, ncol = 2*l)
meanb <- numeric(l)
meanvar <- numeric(l)
varb <- numeric(l)
TT <- numeric(l)
sqrtt <- numeric(l)
t <- numeric(l)
## Run a logistic regression on each of the 5 imputed data sets and store the
## coefficients and theire standard errors.
for (i in 1:m) { # Modify following line appropriately
model <- glm(outcome01~survrate + gsi + avoid + intrus ,subset = (imps ==i), binomial, data = data)
a <- summary(model)
# print(a)
n <- 2*l
b[i,] <- a$coefficients[1:n]
}
## Calculate the coefficients, st. errors, and t values across 5 imputations
for (i in 1:l) {
meanb[i] <- mean(b[,i])
meanvar[i] <- mean((b[,i+l]^2))
varb[i] <- var(b[,i])
}
cat("\n\n\nThe mean regression coefficients are: \n\n")
print(meanb)
for (i in 1:l) {
TT[i] <- meanvar[i] + (1 + 1/5)*varb[i]
sqrtt[i] <- sqrt(TT[i])
t[i] <- meanb[i]/sqrtt[i]
}
cat("The standard errors are: \n\n")
print(sqrtt)
cat("\n The t values are: \n\n")
print(t)
Here is the data ( in survrateMissingNA.dat file):
c(1, 4.405, 17.2, 31.144, 491, 1029, 61, 20.2, 999, 2, 8.963,
17.6, 47.951, 445, 934, 32, 21, 3.85, 3, 4.778, 19.3, 32.175,
448, 944, 27, 21.1, 3.296, 4, 999, 17.1, 28.934, 482, 1005, 66,
20.3, 1.792, 5, 4.992, 24, 41.078, 417, 902, 11, 21, 3.807, 6,
5.443, 18.4, 34.571, 462, 980, 62, 21.5, 3.367, 7, 8.817, 14.4,
999, 431, 908, 3, 21.7, 4.394, 8, 7.03, 16.6, 39.076, 429, 897,
3, 21, 4.22, 9, 5.718, 19.1, 32.588, 420, 889, 36, 20.7, 3.871,
10, 5.193, 16.3, 32.291, 406, 854, 16, 20.2, 4.174, 11, 6.078,
17.9, 38.518, 407, 889, 17, 21.6, 999, 12, 4.21, 19.1, 29.783,
468, 979, 62, 21.4, 2.708, 13, 6.136, 17.3, 39.431, 488, 1048,
69, 21.2, 2.565, 14, 5.826, 17.5, 36.785, 415, 882, 19, 21.2,
4.06, 15, 5.483, 15.8, 31.511, 516, 1099, 64, 22.1, 1.609, 16,
5.817, 15.1, 34.652, 503, 1060, 74, 21.7, 2.197, 17, 999, 17,
32.257, 477, 999, 65, 20.1, 2.398, 18, 4.761, 999, 26.461, 486,
1021, 80, 19.4, 2.197, 19, 6.428, 13.8, 31.972, 427, 896, 2,
21.5, 4.22, 20, 7.245, 17, 40.661, 430, 909, 11, 20.7, 4.159,
21, 7.287, 14.8, 40.795, 430, 907, 6, 21.6, 4.382, 22, 6.994,
20.1, 41.895, 484, 1033, 68, 21.3, 999, 23, 6, 17.5, 35.948,
506, 1085, 60, 22.1, 2.197, 24, 4.08, 17.5, 26.818, 496, 1036,
79, 18.7, 1.386, 25, 5.383, 15.5, 31.189, 495, 1045, 64, 21.5,
2.197, 26, 5.692, 16.3, 28.785, 473, 1009, 55, 21.9, 3.045, 27,
5.935, 14.5, 30.922, 494, 1050, 73, 21.7, 2.197, 28, 999, 18.7,
34.836, 434, 917, 39, 21.3, 3.401, 29, 5.859, 15.6, 999, 444,
935, 4, 22.3, 999, 30, 9.774, 13.8, 46.087, 420, 898, 3, 20.8,
4.248, 31, 4.586, 17.2, 28.493, 485, 1015, 59, 20.3, 2.398, 32,
9.623, 15.2, 47.612, 419, 892, 16, 21.9, 4.304, 33, 5.077, 16.2,
30.793, 411, 865, 11, 19.3, 4.094, 34, 4.775, 15.3, 26.327, 515,
1107, 78, 21.4, 1.609, 35, 6.162, 16.6, 36.802, 460, 975, 60,
21.3, 3.135, 36, 4.845, 15.5, 28.172, 491, 1027, 66, 20.6, 2.197,
37, 6.436, 19.9, 38.555, 448, 947, 12, 22.3, 3.932, 38, 7.109,
17.1, 999, 419, 880, 8, 21, 4.248, 39, 999, 14.7, 40.729, 425,
888, 2, 21.4, 4.248, 40, 4.797, 16.4, 30.279, 401, 844, 13, 18.9,
4.06, 41, 4.775, 14.4, 25.994, 505, 1068, 68, 21.3, 1.609, 42,
4.388, 18.6, 32.477, 497, 1040, 83, 19.7, 2.485, 43, 5.222, 15.7,
31.223, 419, 893, 30, 20.2, 3.85, 44, 3.656, 24.3, 29.082, 513,
1076, 69, 21.5, 1.386, 45, 6.75, 13.8, 35.406, 429, 901, 7, 21.9,
4.22, 46, 5.327, 14.6, 33.987, 428, 896, 6, 20.7, 999, 47, 5.906,
20.2, 36.151, 443, 937, 16, 22.4, 3.871, 48, 6.107, 14.8, 31.944,
448, 932, 57, 20, 2.833, 49, 6.93, 15.9, 37.746, 501, 1073, 64,
22.3, 2.197, 50, 6.16, 14.9, 31.285, 476, 1001, 70, 21.4, 2.303)