I am using optimr to minimize a function that calculates the negative log likelihood for choice behavior. On some iterations, I receive the error message:
`
Error in optim(par = par, fn = efn, gr = egr, lower = lower, upper = upper, :
L-BFGS-B needs finite values of 'fn'
`
I am having trouble determining which starting values produce this error and how to effectively avoid them when setting the bounds for optimr.
Function to be optimized:
`
new_loglikelihood_ACL <- function(params,rep,val){
alpha_param <- invlogit(params[1])
phi_param <- invlogit(params[2])
choiceProb_Full <- c()
Block_Num <- 1
p3 <- 1 - phi_param
for(i in 1:N_blocks){
choiceProb <- c()
Val_DF <- data.frame(val[[Block_Num]])
Rep_DF <- rep %>% filter(Block == Block_Num)
bs <- c(Rep_DF$displayed_stim)
choice <- c(Rep_DF$a)
outcome <- c(Rep_DF$r)
tf <- c(Rep_DF$tf)
dimension_hint <- ifelse(tf[1] == 'yellow', 'color',
ifelse(tf[1] == 'blue', 'color',
ifelse(tf[1] == 'orange', 'color', 'shape')))
phi_color <- ifelse(dimension_hint == 'color',phi_param,p3) # allocates attention weighting based on relevant dimension
phi_shape <- ifelse(dimension_hint == 'shape',phi_param,p3) # allocates attention weighting based on relevant dimension
Trial_Num <- 1
for(i in 1:num_trials_per_block){
b <- as.numeric(unlist(strsplit(bs[Trial_Num],",")))
bandits_df <- subset(Val_DF, stimuli_num == b[1] | stimuli_num == b[2] | stimuli_num == b[3])
Q_bandits <- (bandits_df$v_shape * phi_shape) + (bandits_df$v_color * phi_color)
p_num <- exp(beta_test*Q_bandits)
p <- p_num / sum(p_num)
c <- choice[Trial_Num]
cp <- p[c]
choiceProb <- c(choiceProb,cp)
color_num <- subset(Val_DF, features_color == bandits_df$features_color[c])$stimuli_num
shape_num <- subset(Val_DF, features_shape == bandits_df$features_shape[c])$stimuli_num
# Updating phase
Val_DF$v_shape[shape_num] <- bandits_df$v_shape[c] + alpha_param * (outcome[Trial_Num] - Q_bandits[c]) * phi_shape # computing new value for chosen features based on reward outcome; tailored by alpha (learning rate), delta (RPE), and phi shape (attention weighting)
Val_DF$v_color[color_num] <- bandits_df$v_color[c] + alpha_param * (outcome[Trial_Num] - Q_bandits[c]) * phi_color # computing new value for chosen features based on reward outcome; tailored by alpha (learning rate), delta (RPE), and phi color (attention weighting)
Trial_Num <- Trial_Num + 1
}
choiceProb_Full <- c(choiceProb_Full,choiceProb)
Block_Num <- Block_Num + 1
}
NegLL <- -1 * sum(log(choiceProb_Full))
}
`
Sample data for reproducing the error and the call to optimr which results in the error:
`
create_val_df <- function(){
Q_shapes = c(0.1666667,0.1666667,0.1666667) # start with a value for each feature (1-6) ---> q is usually the value of an action; V is equal to the value of the stimuli.
v_shape = rep(Q_shapes,each=3) # hacky way to line them up and recreate the dataframe commented above
Q_color = c(0.1666667,0.1666667,0.1666667)
Q_stimuli = as.data.frame(v_shape)
Q_stimuli$v_color = c(Q_color)
Q_stimuli$features = c("yellow, circle","yellow, oval","yellow, square",
"blue, circle","blue, oval","blue, square","orange, circle",
"orange, oval","orange, square")
Q_stimuli$features_color <- c("yellow","yellow","yellow","blue",
"blue","blue","orange","orange","orange")
Q_stimuli$features_shape <- c("circle","oval","square","circle",
"oval","square","circle","oval","square")
Q_stimuli$stimuli_num <- c(1:9)
finalList <- list(Q_stimuli,Q_stimuli,Q_stimuli,Q_stimuli,Q_stimuli,Q_stimuli)
finalList[[1]]$Block <- c(1)
finalList[[2]]$Block <- c(2)
finalList[[3]]$Block <- c(3)
finalList[[4]]$Block <- c(4)
finalList[[5]]$Block <- c(5)
finalList[[6]]$Block <- c(6)
return(finalList)
}
values_data = create_val_df()
displayed_stim <- c("2,6,7","2,4,9","3,5,7","1,5,9","3,4,8","2,4,9","2,6,7","3,5,7","1,5,9","2,4,9","1,5,9","3,5,7","3,4,8"
"2,6,7","1,6,8","3,4,8","1,6,8","1,6,8","3,4,8","1,5,9","3,5,7","1,6,8","3,5,7","1,5,9","1,5,9","3,4,8"
"2,4,9","2,4,9","1,6,8","1,6,8","2,6,7","3,5,7","3,4,8","2,6,7","2,6,7","2,4,9","1,5,9","1,5,9","3,4,8"
"2,6,7","3,5,7","1,6,8","3,5,7","1,6,8","1,6,8","2,6,7","3,4,8","2,6,7","3,4,8","2,4,9","3,5,7","2,4,9"
"1,5,9","2,4,9","1,6,8","3,4,8","2,6,7","1,6,8","2,4,9","1,5,9","1,6,8","2,4,9","3,4,8","2,6,7","2,4,9"
"2,6,7","3,5,7","1,5,9","1,5,9","3,5,7","3,5,7","3,4,8","2,4,9","2,6,7","2,6,7","1,5,9","1,5,9","3,5,7"
"1,6,8","2,6,7","1,6,8","1,6,8","3,5,7","3,4,8","3,5,7","3,4,8","2,4,9","2,4,9","1,5,9","3,4,8","3,5,7"
"2,4,9","2,4,9","3,5,7","2,4,9","1,5,9","3,4,8","3,5,7","1,5,9","3,4,8","2,6,7","1,6,8","1,6,8","2,6,7"
"2,6,7","3,4,8","1,5,9","1,6,8")
a <- c(3,1,2,2,3,1,2,2,1,1,1,1,1,2,2,1,2,2,1,1,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,2,1,1,3,1,3,3,3,3,2,3,2,2,3,2
1,2,1,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,3,3,1,2,1,2,3,2,2,3,1,3,3,1
1,3,2,3)
r <- c(0,1,0,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1
1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,1,1,1,0,0,1,1,1,0,1,1,1,1,0,0,1,1
1,0,1,1)
tf <- c("square","square","square","square","square","square","square","square","square","square","square"
"square","square","square","square","square","square","square","yellow","yellow","yellow","yellow"
"yellow","yellow","yellow","yellow","yellow","yellow","yellow","yellow","yellow","yellow","yellow"
"yellow","yellow","yellow","circle","circle","circle","circle","circle","circle","circle","circle"
"circle","circle","circle","circle","circle","circle","circle","circle","circle","circle","orange"
"orange","orange","orange","orange","orange","orange","orange","orange","orange","orange","orange"
"orange","orange","orange","orange","orange","orange","blue","blue","blue","blue","blue",
"blue","blue","blue","blue","blue","blue","blue","blue","blue","blue","blue",
"blue","blue","oval","oval","oval","oval","oval","oval","oval","oval","oval",
,"oval","oval","oval","oval","oval","oval","oval","oval","oval")
Block <- c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6
6,6,6,6)
rep_data <- data.frame(displayed_stim,a,r,tf,Block)
starting_alpha <- logit(runif(1,0,1))
starting_phi <- logit(runif(1,0.50,1))
optim_output <- optimx::optimr(c(starting_alpha,starting_phi),fn=new_loglikelihood_ACL,method="L-BFGS-B",lower=c(-Inf,0), upper=c(Inf,Inf),val=values_data,rep=rep_data,control=list(ndeps=c(1e-5,2),maxit=10000))
`