0

I am new to R and I got a hold of this program I am trying to run. But I am getting error in the variable "outcome01". It could be that if I (somehow) fix this variable, there would be other similar errors. Any help is appreciated; Here is the code:

library(norm)
# Make appropriate changes in file and variable names.
setwd ("c:\\Users\\Dave Desktop\\Dropbox\\Webs\\StatPages\\More_Stuff\\Missing_Data")
x <- read.table("survrateMissingNA.dat", header = TRUE)
y <- as.matrix(x)  #convert table to matrix
cat("Logistic regression using 132 cases and missing data \n\n")
print(summary(glm(formula = outcome01~survrate + gsi + avoid + intrus, binomial, data = x)))  #Use original data with missing values
attach(x)


##########
## Important  The following code will run m = 5 times. The data will be concatenated into ComFile and then analyzed.


#  Data Augmentation using norm.R
m <- 5    #Number of imputations
k <- 9    #Number of variables in raw data file
l <- 5    #Number of variables actually used in regression
CombFile <- matrix(nrow = 0, ncol = k)

for (i in 1:m) {
   s <- prelim.norm(y)  #get preliminary statistics for the analysis
   thetahat <-em.norm(s)  #Get MLE for start value
   rngseed(25672)
   theta <- da.norm(s, thetahat, steps=200, showits=TRUE) # GET MLE
   getparam.norm(s, theta) #Print out those results
   impdata <-imp.norm(s, theta, y)  #Impute the data
   filename <- paste("CombFile", i, sep = "")
   CombFile <- rbind(CombFile, impdata)
   write(t(impdata), file = "impsurvrate", ncolumns = 9, sep = " ")
   z <- data.frame(impdata)
   z$outcome01 <- round(z$outcome01, digits = 0)
   summary((glm(formula = outcome01~survrate + gsi + avoid + intrus, binomial, data = z)))  #Use imputed data.
}

## Creating the final data file with imputed data  660 rows
nPerImp <- nrow(CombFile)/m
imps <- rep(1:m, each = nPerImp)
# Add a variable representing the imputation number.
data <- as.data.frame(cbind(imps, CombFile))
data$outcome01 <- round(data$outcome01, digits = 0)
# head(data)
attach(data)

## Set up variables to hold results
b <- matrix(NA,nrow = m, ncol = 2*l)
meanb <- numeric(l)
meanvar <- numeric(l)
varb <- numeric(l)
TT <- numeric(l)
sqrtt <- numeric(l)
t <- numeric(l)

## Run a logistic regression on each of the 5 imputed data sets and store the
## coefficients and theire standard errors.
for (i in 1:m) {         # Modify following line appropriately
       model <- glm(outcome01~survrate + gsi + avoid + intrus ,subset = (imps ==i), binomial, data = data)
       a <- summary(model)
       # print(a)
       n <- 2*l
       b[i,] <-  a$coefficients[1:n]
}

##  Calculate the coefficients, st. errors, and t values across 5 imputations
for (i in 1:l) {
  meanb[i] <- mean(b[,i])
  meanvar[i] <- mean((b[,i+l]^2))
  varb[i] <- var(b[,i])
  }

  cat("\n\n\nThe mean regression coefficients are: \n\n")
  print(meanb)


for (i in 1:l) {
  TT[i] <- meanvar[i] + (1 + 1/5)*varb[i]
  sqrtt[i] <- sqrt(TT[i])
  t[i] <- meanb[i]/sqrtt[i]
  }
cat("The standard errors are: \n\n")
print(sqrtt)
cat("\n The t values are: \n\n")
print(t)

Here is the data ( in survrateMissingNA.dat file):

c(1, 4.405, 17.2, 31.144, 491, 1029, 61, 20.2, 999, 2, 8.963, 
17.6, 47.951, 445, 934, 32, 21, 3.85, 3, 4.778, 19.3, 32.175, 
448, 944, 27, 21.1, 3.296, 4, 999, 17.1, 28.934, 482, 1005, 66, 
20.3, 1.792, 5, 4.992, 24, 41.078, 417, 902, 11, 21, 3.807, 6, 
5.443, 18.4, 34.571, 462, 980, 62, 21.5, 3.367, 7, 8.817, 14.4, 
999, 431, 908, 3, 21.7, 4.394, 8, 7.03, 16.6, 39.076, 429, 897, 
3, 21, 4.22, 9, 5.718, 19.1, 32.588, 420, 889, 36, 20.7, 3.871, 
10, 5.193, 16.3, 32.291, 406, 854, 16, 20.2, 4.174, 11, 6.078, 
17.9, 38.518, 407, 889, 17, 21.6, 999, 12, 4.21, 19.1, 29.783, 
468, 979, 62, 21.4, 2.708, 13, 6.136, 17.3, 39.431, 488, 1048, 
69, 21.2, 2.565, 14, 5.826, 17.5, 36.785, 415, 882, 19, 21.2, 
4.06, 15, 5.483, 15.8, 31.511, 516, 1099, 64, 22.1, 1.609, 16, 
5.817, 15.1, 34.652, 503, 1060, 74, 21.7, 2.197, 17, 999, 17, 
32.257, 477, 999, 65, 20.1, 2.398, 18, 4.761, 999, 26.461, 486, 
1021, 80, 19.4, 2.197, 19, 6.428, 13.8, 31.972, 427, 896, 2, 
21.5, 4.22, 20, 7.245, 17, 40.661, 430, 909, 11, 20.7, 4.159, 
21, 7.287, 14.8, 40.795, 430, 907, 6, 21.6, 4.382, 22, 6.994, 
20.1, 41.895, 484, 1033, 68, 21.3, 999, 23, 6, 17.5, 35.948, 
506, 1085, 60, 22.1, 2.197, 24, 4.08, 17.5, 26.818, 496, 1036, 
79, 18.7, 1.386, 25, 5.383, 15.5, 31.189, 495, 1045, 64, 21.5, 
2.197, 26, 5.692, 16.3, 28.785, 473, 1009, 55, 21.9, 3.045, 27, 
5.935, 14.5, 30.922, 494, 1050, 73, 21.7, 2.197, 28, 999, 18.7, 
34.836, 434, 917, 39, 21.3, 3.401, 29, 5.859, 15.6, 999, 444, 
935, 4, 22.3, 999, 30, 9.774, 13.8, 46.087, 420, 898, 3, 20.8, 
4.248, 31, 4.586, 17.2, 28.493, 485, 1015, 59, 20.3, 2.398, 32, 
9.623, 15.2, 47.612, 419, 892, 16, 21.9, 4.304, 33, 5.077, 16.2, 
30.793, 411, 865, 11, 19.3, 4.094, 34, 4.775, 15.3, 26.327, 515, 
1107, 78, 21.4, 1.609, 35, 6.162, 16.6, 36.802, 460, 975, 60, 
21.3, 3.135, 36, 4.845, 15.5, 28.172, 491, 1027, 66, 20.6, 2.197, 
37, 6.436, 19.9, 38.555, 448, 947, 12, 22.3, 3.932, 38, 7.109, 
17.1, 999, 419, 880, 8, 21, 4.248, 39, 999, 14.7, 40.729, 425, 
888, 2, 21.4, 4.248, 40, 4.797, 16.4, 30.279, 401, 844, 13, 18.9, 
4.06, 41, 4.775, 14.4, 25.994, 505, 1068, 68, 21.3, 1.609, 42, 
4.388, 18.6, 32.477, 497, 1040, 83, 19.7, 2.485, 43, 5.222, 15.7, 
31.223, 419, 893, 30, 20.2, 3.85, 44, 3.656, 24.3, 29.082, 513, 
1076, 69, 21.5, 1.386, 45, 6.75, 13.8, 35.406, 429, 901, 7, 21.9, 
4.22, 46, 5.327, 14.6, 33.987, 428, 896, 6, 20.7, 999, 47, 5.906, 
20.2, 36.151, 443, 937, 16, 22.4, 3.871, 48, 6.107, 14.8, 31.944, 
448, 932, 57, 20, 2.833, 49, 6.93, 15.9, 37.746, 501, 1073, 64, 
22.3, 2.197, 50, 6.16, 14.9, 31.285, 476, 1001, 70, 21.4, 2.303) 
yizzlez
  • 8,757
  • 4
  • 29
  • 44
Seif
  • 1
  • 1
  • (somebody please reject my edit since I cannot ... it was made too early and does not help the question) – r2evans Apr 18 '14 at 21:12
  • You're close to a good question, but it is not reproducible as it stands (we don't know column headers, for one). Please read [reproducible code](http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) and revise. – r2evans Apr 18 '14 at 21:13

1 Answers1

0

Can you please be a bit more specific with your question, in particular, where in the code does the error occur?

With the survrateMissingNA.dat, are there any headers on it, or does it appear exactly as you posted it?

If there are no headers, this line of code will give you an error straight away: print(summary(glm(formula = outcome01~survrate + gsi + avoid + intrus, binomial, data = x)))

That line is reading data from the data.frame x, which contains the survrateMissingNA.dat data. If that data file has no headers, then outcome01 does not exist. (not does survrate, gsi avoid orintrus).

To fix this (initial error), make sure that the file contains headings.

Pash101
  • 631
  • 3
  • 14