Hi I'm trying to simulate and fit this data. But, there is an error. How can i fit it?
n=500
category=3
time=5
# group probability
p <- 0.5
# group covariates
group_rows <- 500
group_cols <-1
# group_i data generating
group_i <- matrix(rbinom(n, size = 1, prob = p), nrow= group_rows, ncol=group_cols)
group_i
head(group_i)
# time covariates
time_rows <- 500
time_cols <- 5
# N(0,1)
time_it <- matrix(rnorm(time_rows * time_cols), nrow = time_rows, ncol = time_cols)
#random effect data generating
random_rows <- 500
random_cols <-1
# N(0,1)
b_i0 <- matrix(rnorm(random_rows*random_cols, sd=0.1), nrow=random_rows, ncol=random_cols)
# true parameter
beta0 <- c(-0.7, 0.665)
beta1 <- -2
beta2 <- 1
# generating y
y <- array(0, dim = c(5, 2, 500))
# matrix
for (i in 1:n) {
for (k in 1:category-1) {
for (t in 1:time) {
y[t, k, i] <- beta0[k] + beta1 * group_i[i, 1] + beta2 * time_it[i, t]+b_i0[i,1]
}
}
}
# sigmoid
sigmoid <- function(x) {
return(exp(x)/(1+exp(x)))
}
# sigmoid transformation
prob_y <- y
for (i in 1:n) {
for (k in 1:category-1) {
for (t in 1:time) {
prob_y[t, k, i] <- sigmoid(y[t, k, i])
}
}
}
# print
first <- prob_y[,1,]
second <- prob_y[,2,]-prob_y[,1,]
third <- 1-prob_y[,2,]
pie <- array(c(first, second, third), dim = c(5, 3, 500))
# pie
trial <- array(0, dim=c(5,3,500))
t(rmultinom(1,1,pie[1,,1]))
for (i in 1:n){
for(t in 1:time){trial[t,,i] <- t(rmultinom(1,1,pie[t,,i]))}
}
y_cat <- matrix(0, nrow=500, ncol=5)
for(i in 1:n){
for(t in 1:time){if(trial[t,1,i]==1){y_cat[i,t]='A'} else if(trial[t,2,i]==1){y_cat[i,t]='B'}
else if(trial[t,3,i]==1){y_cat[i,t]='C'}
}
}
y_cat <- as.vector(t(y_cat))
y_cat
cova <- cbind(group_i, b_i0)
cova_it <- matrix(0, nrow=2500, ncol=2)
for (i in 1:n) {
for(k in 1:2){
start_idx <- 5*i-4
end_idx <- 5*i
cova_it[start_idx:end_idx,k ] <- cova[i,k ]
}
}
time_it <- as.vector(t(time_it))
t <- rep(1:5, 500)
data <- cbind(cova_it, t)
id <- rep(1:500, each=5)
data <- cbind(id, y_cat, cova_it,t )
data <- cbind(data, time_it)
data <- data.frame(data)
names(data) = c("id", "y_itk", "group_i", "b_i0", "t", "time_it")
data$y_itk <- factor(data$y_itk, ordered=TRUE, levels=c("A","B","C"))
data$id <- as.numeric(data$id)
data$group_i <- as.factor(data$group_i)
data$b_i0 <- as.numeric(data$b_i0)
data$t <- as.factor(data$t)
data$time_it <- as.numeric(data$time_it)
> head(data,10)
id y_itk group_i b_i0 t time_it
1 1 B 0 0.17976949 1 1.32954947
2 1 C 0 0.17976949 2 0.67217515
3 1 C 0 0.17976949 3 -0.42003656
4 1 A 0 0.17976949 4 0.82198396
5 1 C 0 0.17976949 5 0.96039400
6 2 A 1 -0.10953880 1 -0.33726049
7 2 A 1 -0.10953880 2 1.06015261
8 2 C 1 -0.10953880 3 -0.24649623
9 2 B 1 -0.10953880 4 0.31253911
10 2 B 1 -0.10953880 5 -0.73423100
fit7 <- polr(formula=y_itk~time_it+group_i+(1|id), data=data, HESS=TRUE) Error in fn(par, ...) : unused argument (HESS = TRUE) In addition: Warning message: In polr(formula = y_itk ~ time_it + group_i + (1 | id), data = data, : design appears to be rank-deficient, so dropping some coefs
I've tried to analyze this data with clmm2 and it's working. But, with polr and vglm, both function are not working because of rank deficient.