Linear programming models.
Variant 1.
Sum(Uij * Qj) - Sum(Dij * Xj) + 0 = 0 (for each i)
0 + Sum(Dij * Xj) - Score >= 0 (for each i)
Sum(Qj) = (Number of questions - 30)
Maximize(Score)
Uij
is 1
if user i
has not seen question j
, otherwise it is 0
Dij
is element of identity matrix (Dij=1
if i=j
, otherwise it is 0
)
Xj
is auxiliary variable (one for each user)
Variant 2.
Sum(Uij * Qj) >= Score (for each i)
Sum(Qj) = (Number of questions - 30)
No objective function, just check feasibility
In this case, LP problem is simpler, but Score
should be determined by binary and linear search. Set current range to [0 .. the least number of unseen questions for a user], set Score
to the middle of the range, apply integer LP algorithm (with small time limit). If no solution found, set range to [begin .. Score
], otherwise set it to [Score
.. end] and continue binary search.
(Optionally) use binary search to determine upper bound for exact solution's Score
.
Starting from the best Score
, found by binary search, apply integer LP algorithm with Score
, increased by 1, 2, ...
(and limiting computation time as necessary). At the end, you get either exact solution, or some good approximation.
Here is sample code in C for GNU GLPK (for variant 1):
#include <stdio.h>
#include <stdlib.h>
#include <glpk.h>
int main(void)
{
int ind[3000];
double val[3000];
int row;
int col;
glp_prob *lp;
// Parameters
int users = 120;
int questions = 10000;
int questions2 = questions - 30;
int time = 30; // sec.
// Create GLPK problem
lp = glp_create_prob();
glp_set_prob_name(lp, "questions");
glp_set_obj_dir(lp, GLP_MAX);
// Configure rows
glp_add_rows(lp, users*2 + 1);
for (row = 1; row <= users; ++row)
{
glp_set_row_bnds(lp, row, GLP_FX, 0.0, 0.0);
glp_set_row_bnds(lp, row + users, GLP_LO, 0.0, 0.0);
}
glp_set_row_bnds(lp, users*2 + 1, GLP_FX, questions2, questions2);
// Configure columns
glp_add_cols(lp, questions + users + 1);
for (col = 1; col <= questions; ++col)
{
glp_set_obj_coef(lp, col, 0.0);
glp_set_col_kind(lp, col, GLP_BV);
}
for (col = 1; col <= users; ++col)
{
glp_set_obj_coef(lp, questions + col, 0.0);
glp_set_col_kind(lp, questions + col, GLP_IV);
glp_set_col_bnds(lp, questions + col, GLP_FR, 0.0, 0.0);
}
glp_set_obj_coef(lp, questions+users+1, 1.0);
glp_set_col_kind(lp, questions+users+1, GLP_IV);
glp_set_col_bnds(lp, questions+users+1, GLP_FR, 0.0, 0.0);
// Configure matrix (question columns)
for(col = 1; col <= questions; ++col)
{
for (row = 1; row <= users*2; ++row)
{
ind[row] = row;
val[row] = ((row <= users) && (rand() % 2))? 1.0: 0.0;
}
ind[users*2 + 1] = users*2 + 1;
val[users*2 + 1] = 1.0;
glp_set_mat_col(lp, col, users*2 + 1, ind, val);
}
// Configure matrix (user columns)
for(col = 1; col <= users; ++col)
{
for (row = 1; row <= users*2; ++row)
{
ind[row] = row;
val[row] = (row == col)? -1.0: ((row == col + users)? 1.0: 0.0);
}
ind[users*2 + 1] = users*2 + 1;
val[users*2 + 1] = 0.0;
glp_set_mat_col(lp, questions + col, users*2 + 1, ind, val);
}
// Configure matrix (score column)
for (row = 1; row <= users*2; ++row)
{
ind[row] = row;
val[row] = (row > users)? -1.0: 0.0;
}
ind[users*2 + 1] = users*2 + 1;
val[users*2 + 1] = 0.0;
glp_set_mat_col(lp, questions + users + 1, users*2 + 1, ind, val);
// Solve integer GLPK problem
glp_iocp param;
glp_init_iocp(¶m);
param.presolve = GLP_ON;
param.tm_lim = time * 1000;
glp_intopt(lp, ¶m);
printf("Score = %g\n", glp_mip_obj_val(lp));
glp_delete_prob(lp);
return 0;
}
Time limit is not working reliably in my tests. Looks like some bug in GLPK...
Sample code for variant 2 (only LP algorithm, no automatic search for Score
):
#include <stdio.h>
#include <stdlib.h>
#include <glpk.h>
int main(void)
{
int ind[3000];
double val[3000];
int row;
int col;
glp_prob *lp;
// Parameters
int users = 120;
int questions = 10000;
int questions2 = questions - 30;
double score = 4869.0 + 7;
// Create GLPK problem
lp = glp_create_prob();
glp_set_prob_name(lp, "questions");
glp_set_obj_dir(lp, GLP_MAX);
// Configure rows
glp_add_rows(lp, users + 1);
for (row = 1; row <= users; ++row)
{
glp_set_row_bnds(lp, row, GLP_LO, score, score);
}
glp_set_row_bnds(lp, users + 1, GLP_FX, questions2, questions2);
// Configure columns
glp_add_cols(lp, questions);
for (col = 1; col <= questions; ++col)
{
glp_set_obj_coef(lp, col, 0.0);
glp_set_col_kind(lp, col, GLP_BV);
}
// Configure matrix (question columns)
for(col = 1; col <= questions; ++col)
{
for (row = 1; row <= users; ++row)
{
ind[row] = row;
val[row] = (rand() % 2)? 1.0: 0.0;
}
ind[users + 1] = users + 1;
val[users + 1] = 1.0;
glp_set_mat_col(lp, col, users + 1, ind, val);
}
// Solve integer GLPK problem
glp_iocp param;
glp_init_iocp(¶m);
param.presolve = GLP_ON;
glp_intopt(lp, ¶m);
glp_delete_prob(lp);
return 0;
}
It appears that variant 2 allows to find pretty good approximation quite fast.
And approximation is better than for variant 1.