I everybody,
I have a function to optimize, subject to linear constraints.
I am actually using maxLik
R-package, but this is a wrapper for various method, thus what I am actually running in constrOptim
.
The problem is the following: I have a matrix of constraints which is n^2 x n, but n is ~ 10^3, so the matrix is huge and the routine stops for memory problems.
Cholmod error 'problem too large' at file ../Core/cholmod_dense.c, line 105
It seemed quite natural to me to shift to sparse matrices (indeed my matrix is very sparse) with the Matrix
package, but I always get the following error
Error: Matrices must have same dimensions in ineqA * gi.old
even for small n.
Does it mean that sparseMatrix is not supported in constrOptim
?
Do you know any way out?
reproducible example
you can find the dataset I am using to optimize here: http://konect.uni-koblenz.de/downloads/extraction/opsahl.tar.bz2
and here you have the code
#read edgelist
edgelist <- read.table('out.opsahl-usairport',skip=2)
colnames(edgelist) = c('V1','V2','weight')
require(igraph)
g = graph_from_data_frame(edgelist)
s_in = strength(g,v=V(g), mode= 'in')
s_out = strength(g,v=V(g),mode='out')
n = length(s_in)
# optimization function
objective_fun = function(x){
theta_out = x[1:(length(x)/2)]; theta_in = x[(length(x)/2+1):length(x)];
llikelihood(s_out,s_in,theta_out,theta_in)
}
llikelihood = function(s_out,s_in,theta_out, theta_in){
theta_sum_mat = outer(theta_out,rep(1,length(theta_out))) + outer(rep(1,length(theta_in)),theta_in)
theta_sum_mat = log(1-exp(-theta_sum_mat))
diag(theta_sum_mat) = 0 # avoid self loops
f = -sum(s_out*theta_out+s_in*theta_in) + sum(theta_sum_mat)
f
}
#choose appropriate starting point
starting_point = function(s_out,s_in){
s_tot = sum(s_in) # =sum(s_out)
s_mean = mean(mean(s_in),mean(s_out))
z = log((s_tot + s_mean^2)/(s_mean^2))
list(theta_out = rep(1,length(s_out)), theta_in=rep(z-1,length(s_in))) # starting parameters
}
#gradient
grad = function(x){
theta_out = x[1:(length(x)/2)]; theta_in = x[(length(x)/2+1):length(x)];
ret = grad_fun(s_out,s_in,theta_out,theta_in)
ret
}
grad_fun = function(s_out,s_in, theta_out, theta_in){
theta_sum_mat = outer(theta_out,rep(1,length(theta_out))) + outer(rep(1,length(theta_in)),theta_in)
theta_sum_mat = exp(-theta_sum_mat)/(1-exp(-theta_sum_mat))
diag(theta_sum_mat) = 0 # avoid self loops
c(-s_out + rowSums(theta_sum_mat), -s_in + colSums(theta_sum_mat))
}
#constraints
constraints = function(n){
a1 = Diagonal(n); a2 = sparseMatrix(c(1:n),rep(1,n), x=1, dims=c(n,n)) # Diagonal is a sparse diagonal matrix
a12 = cBind(a1,a2)
a12[1,] = 0 # avoid self loops
dd = function(j){
sparseMatrix(c(1:n),rep(j,n), x=rep(1,n), dims=c(n,n))
}
b1 = sparseMatrix(i=1, j=1, x=1, dims=c(n^2,1)) # 1,0,0,... n^2 vector
for(j in c(2:n)) {
a = cBind(Diagonal(n),dd(j))
a[j,]=0 # avoid self loops
a12 = rBind(a12, a)
b1[(j-1)*n+j] = 1 # add 1 to ''self loops'' rows, in order to have the inequality satisfied
}
return(list(A=a12, B=b1))
}
# starting point
theta_0 = starting_point(s_out,s_in)
x_0 = c(theta_0$theta_out, theta_0$theta_in)
#constraints
constr = list(ineqA=constraints(n)$A, ineqB=constraints(n)$B)
# optimization
co = maxControl(printLevel = 1, iterlim=500, tol=1e-4) #tol=1e-8 (def) iterlim=150 (def)
res = maxLik(objective_fun, grad=grad, start=x_0, constraints=constr, control=co)