Here is the genetic algorithm I referenced modified to your case, for an explanation of the algorithm see my answer there. There may be (are certainly) ways to solve your issue with less code, but I had this solution on the shelf already and adapting it was simple. The input required is a data.frame
with a column value and a column weights, which can be all zero:
value weights
1 45 0
2 33 0
3 47 0
4 65 0
5 12 0
6 43 0
7 5 0
... ... ...
The algorithm will then find a set of weights from the set c(-1,0,1)
such that the value of
abs(target_value - sum(final_solution$value*final_solution$weights))
is minimized.
There is definitely still room for improvement, for example the weights are now set completely randomly, so the expected weighted sum of an initial solution is always 0. If the target_value is very high, it would be best to assign 1's a higher probability than -1, to converge to an optimal solution faster.
It seems to work very well for this case, with 100000
objects and a target value of 12000
, it finds an optimal solution within a fraction of a second:

Code:
### PARAMETERS -------------------------------------------
n_population = 100 # the number of solutions in a population
n_iterations = 100 # The number of iterations
n_offspring_per_iter = 80 # number of offspring to create per iteration
frac_perm_init = 0.25 # fraction of columns to change from default solution while creating initial solutions
early_stopping_rounds = 100 # Stop if score not improved for this amount of iterations
### SAMPLE DATA -------------------------------------------------
n_objects = 100000
datain =data.frame(value=round(runif(n_objects,0,100)),weights = 0))
target_value=12000
### ALL OUR PREDEFINED FUNCTIONS ----------------------------------
# Score a solution
# We calculate the score by taking the sum of the squares of our overcapacity (so we punish very large overcapacity on a day)
score_solution <- function(solution,target_value)
{
abs(target_value-sum(solution$value*solution$weights))
}
# Merge solutions
# Get approx. 50% of tasks from solution1, and the remaining tasks from solution 2.
merge_solutions <- function(solution1,solution2)
{
solution1$weights = ifelse(runif(nrow(solution1),0,1)>0.5,solution1$weights,solution2$weights)
return(solution1)
}
# Randomize solution
# Create an initial solution
randomize_solution <- function(solution)
{
solution$weights = sample(c(-1,0,1),nrow(solution),replace=T)
return(solution)
}
# sort population based on scores
sort_pop <- function(population)
{
return(population[order(sapply(population,function(x) {x[['score']]}),decreasing = F)])
}
# return the scores of a population
pop_scores <- function(population)
{
sapply(population,function(x) {x[['score']]})
}
### RUN SCRIPT -------------------------------
# starting score
print(paste0('Starting score: ',score_solution(datain,target_value)))
# Create initial population
population = vector('list',n_population)
for(i in 1:n_population)
{
# create initial solutions by making changes to the initial solution
solution = randomize_solution(datain)
score = score_solution(solution,target_value)
population[[i]] = list('solution' = solution,'score'= score)
}
population = sort_pop(population)
score_per_iteration <- score_solution(datain,target_value)
# Run the algorithm
for(i in 1:n_iterations)
{
print(paste0('\n---- Iteration',i,' -----\n'))
# create some random perturbations in the population
for(j in 1:10)
{
sol_to_change = sample(2:n_population,1)
new_solution <- randomize_solution(population[[sol_to_change]][['solution']])
new_score <- score_solution(new_solution,target_value)
population[[sol_to_change]] <- list('solution' = new_solution,'score'= new_score)
}
# Create offspring, first determine which solutions to combine
# determine the probability that a solution will be selected to create offspring (some smoothing)
probs = sapply(population,function(x) {x[['score']]})
if(max(probs)==min(probs)){stop('No diversity in population left')}
probs = 1-(probs-min(probs))/(max(probs)-min(probs))+0.2
# create combinations
solutions_to_combine = lapply(1:n_offspring_per_iter, function(y){
sample(seq(length(population)),2,prob = probs)})
for(j in 1:n_offspring_per_iter)
{
new_solution <- merge_solutions(population[[solutions_to_combine[[j]][1]]][['solution']],
population[[solutions_to_combine[[j]][2]]][['solution']])
new_score <- score_solution(new_solution,target_value)
population[[length(population)+1]] <- list('solution' = new_solution,'score'= new_score)
}
population = sort_pop(population)
population= population[1:n_population]
print(paste0('Best score:',population[[1]]['score']))
score_per_iteration = c(score_per_iteration,population[[1]]['score'])
if(i>early_stopping_rounds+1)
{
if(score_per_iteration[[i]] == score_per_iteration[[i-10]])
{
stop(paste0("Score not improved in the past ",early_stopping_rounds," rounds. Halting algorithm."))
}
}
}
plot(x=seq(0,length(score_per_iteration)-1),y=score_per_iteration,xlab = 'iteration',ylab='score')
final_solution = population[[1]][['solution']]