0

I'm aware of the discussion around getting confidence/prediction intervals from lme fits, as discussed in Extract prediction band from lme fit and http://glmm.wikidot.com/faq.

However, I have a model that includes random effects and a variance structure but the confidence/prediction intervals obtained by the procedure cited above don't seem to take the variance structure into consideration.

Does the procedure above consider the weights of the variance structure OR we have to consider it manually?

For an example, see the code below

# generating random data
set.seed(20161112)

repetition_per_cell = 5
ncells_per_age = 75

age = c(rep(1, ncells_per_age*repetition_per_cell), rep(2, ncells_per_age*repetition_per_cell), rep(3, ncells_per_age*repetition_per_cell)) # group age
angle.raw = runif(ncells_per_age*3)*90 # random angles
length.raw = c( rnorm(n = ncells_per_age, mean = 130, sd = 15), rnorm(n = ncells_per_age, mean = 100, sd = 10), rnorm(n = ncells_per_age, mean = 25, sd = 5)) # length is a characteristic of the cell, hence it doesn't change accordingly to the repetition

# here I generate the random cells and I group the length and angle for each cell
j = 1
k = 1
cell = 1
length = 1
angle = 1
for (i in 1:(ncells_per_age*repetition_per_cell*3)) {
  cell[i] = j
  length[i] = length.raw[j]
  angle[i] = angle.raw[j]
  k = k + 1

  if (k == (repetition_per_cell + 1)){
    k = 1
    j = j +1
  }
}


# effects of the parameters
effect_Age = c(50, 75, 100)
effect_Angle = c(1, 1.2, 0.1)
effect_Length = c(-0.05, -0.001, -0.02)
sd_ages = c(3, 1, 5)
sd_angle = c(0.5, 1.75, 2)
cell_sd = 3

j = 1
effect = 0
for (i in 1:(ncells_per_age*repetition_per_cell*3)){
  if (j == 1){
      random_effect_cell = rnorm(n = 1, mean = 0, sd = cell_sd)
  }
  agecode = age[i]
  effect[i] = effect_Age[agecode] + angle[i]*effect_Angle[agecode] + length[i]*effect_Length[agecode] + random_effect_cell + rnorm(n = 1, mean = 0, sd = sd_ages[agecode]*exp(sd_angle[agecode]*angle[i]/90) )

  j = j + 1
  if (j == (repetition_per_cell + 1)){
    j = 1
  }
}

data_test = data.frame(effect, angle, age, cell, length)
data_test$age = as.factor(age)


plot(data_test$angle, data_test$effect, col = data_test$age)

# running the mixed-effect model
library(nlme)
model.lme = lme(effect ~ age*angle + length, random = ~1|cell, weights = varComb(varExp(form =~ angle|age), varIdent(form =~ 1|age)), data=data_test, method = "REML")
summary(model.lme)



# getting confidence/prediction interval
length.means = c(mean(data_test$length[data_test$age == '1']), mean(data_test$length[data_test$age == '2']), mean(data_test$length[data_test$age == '3']))
new.data = expand.grid(age = c('1', '2', '3'), length = length.means, angle = seq(from = 0, to = 90, by = 0.5))

# now we need to remove the lengths from age 2 and 3 from age '1', and the same for others age groups
# removing what doesn't make sense
new.data = new.data[new.data$age == '1' & new.data$length == length.means[1] | new.data$age == '2' & new.data$length == length.means[2] | new.data$age == '3' & new.data$length == length.means[3], ]

new.data$pred = predict(model.lme, new.data, level = 0)

Designmat <- model.matrix(formula(model.lme)[-2], new.data)
predvar <- diag(Designmat %*% vcov(model.lme) %*% t(Designmat))
new.data$SE <- sqrt(predvar)
new.data$SE2 <- sqrt(predvar + model.lme$sigma^2)


# plotting
plot(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'], type = 'l', col = 1, ylim = c(0,200), ylab='effect', xlab='angle')
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'], col = 2)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'], col = 3)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] + 2*new.data$SE[new.data$age == '1'], col = 1, lty = 2)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] + 2*new.data$SE[new.data$age == '2'], col = 2, lty = 2)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] + 2*new.data$SE[new.data$age == '3'], col = 3, lty = 2)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] - 2*new.data$SE[new.data$age == '1'], col = 1, lty = 2)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] - 2*new.data$SE[new.data$age == '2'], col = 2, lty = 2)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] - 2*new.data$SE[new.data$age == '3'], col = 3, lty = 2)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] + 2*new.data$SE2[new.data$age == '1'], col = 1, lty = 3)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] + 2*new.data$SE2[new.data$age == '2'], col = 2, lty = 3)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] + 2*new.data$SE2[new.data$age == '3'], col = 3, lty = 3)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] - 2*new.data$SE2[new.data$age == '1'], col = 1, lty = 3)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] - 2*new.data$SE2[new.data$age == '2'], col = 2, lty = 3)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] - 2*new.data$SE2[new.data$age == '3'], col = 3, lty = 3)

points(data_test$angle, data_test$effect, col = data_test$age)

This is the output that we get.

enter image description here

We can see that the simulated data have a variance that increases with angle but we don't see the same using the confidence/prediction interval calculated using the method above.

So, I tried to include the change in variance by multiplying the predicted variance by the variance structure predicted by lme with the code below.

# plotting with supposed correction
# values from summary(model.lme)
jump_age = c(1.0000000, 0.3720868, 1.7763930)
exp_age = c(0.004880414, 0.016409874, 0.022219648)
new.data$SE.b <- sqrt(predvar)*jump_age[as.numeric(new.data$age)]*exp(exp_age[as.numeric(new.data$age)]*new.data$angle)
new.data$SE2.b <- sqrt(predvar + model.lme$sigma^2)*jump_age[as.numeric(new.data$age)]*exp(exp_age[as.numeric(new.data$age)]*new.data$angle)

plot(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'], type = 'l', col = 1, ylim = c(0,200), ylab='effect', xlab='angle')
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'], col = 2)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'], col = 3)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] + 2*new.data$SE.b[new.data$age == '1'], col = 1, lty = 2)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] + 2*new.data$SE.b[new.data$age == '2'], col = 2, lty = 2)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] + 2*new.data$SE.b[new.data$age == '3'], col = 3, lty = 2)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] - 2*new.data$SE.b[new.data$age == '1'], col = 1, lty = 2)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] - 2*new.data$SE.b[new.data$age == '2'], col = 2, lty = 2)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] - 2*new.data$SE.b[new.data$age == '3'], col = 3, lty = 2)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] + 2*new.data$SE2.b[new.data$age == '1'], col = 1, lty = 3)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] + 2*new.data$SE2.b[new.data$age == '2'], col = 2, lty = 3)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] + 2*new.data$SE2.b[new.data$age == '3'], col = 3, lty = 3)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] - 2*new.data$SE2.b[new.data$age == '1'], col = 1, lty = 3)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] - 2*new.data$SE2.b[new.data$age == '2'], col = 2, lty = 3)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] - 2*new.data$SE2.b[new.data$age == '3'], col = 3, lty = 3)

points(data_test$angle, data_test$effect, col = data_test$age)

The output below seems to fit the data better but I'm not sure if that was just coincidence. Is that the right approach to calculate confidence/prediction bands for lme models that include variance structures?

enter image description here

Community
  • 1
  • 1
  • 1
    Please provide the data and code for a Minimal, Complete, Verifiable Example. http://stackoverflow.com/help/mcve That way we can see what you mean by "look strange". However, if you feel this is a purely statistical question that doesn't require an MCVE then you should migrate it to Cross Validated (http://stats.stackexchange.com). – Hack-R Nov 12 '16 at 03:14
  • @Hack-R I added what you asked. Any ideas? – Hugo Fernando Maia Milan Nov 12 '16 at 07:23

0 Answers0