Results
Below are functions that tackle the problem, firstly though here are the results
find_grouping(orders, 300L)
# orders group
# [1,] 100 1
# [2,] 198 1
# [3,] 50 2
# [4,] 40 2
# [5,] 215 2
# [6,] 296 3
allocate_groups(orders, 300L, 3L) # third argument <-> max. num. of groups
# orders group
# [1,] 100 3
# [2,] 198 3
# [3,] 50 2
# [4,] 40 2
# [5,] 215 2
# [6,] 296 1
# bigger vector
set.seed(123)
orders <- sample(1:300, 15)
find_grouping(orders, 300L)
# orders group
# [1,] 87 2
# [2,] 236 2
# [3,] 122 3
# [4,] 263 4
# [5,] 279 5
# [6,] 14 9
# [7,] 156 6
# [8,] 262 7
# [9,] 162 8
# [10,] 133 8
# [11,] 278 9
# [12,] 132 10
# [13,] 196 1
# [14,] 165 10
# [15,] 30 7
allocate_groups(orders, 300L, 3L)
# orders group
# [1,] 87 1
# [2,] 236 2
# [3,] 122 3
# [4,] 263 3
# [5,] 279 1
# [6,] 14 2
# [7,] 156 3
# [8,] 262 3
# [9,] 162 2
# [10,] 133 1
# [11,] 278 2
# [12,] 132 2
# [13,] 196 1
# [14,] 165 1
# [15,] 30 3
with the data orders = c(100L, 198L, 50L, 40L, 215L, 296L)
.
Edit: New Function
Considering the added constraint of wanting to specify the number of groups, here comes a new function
create_groups <- function (orders, num, group_num) {
orders
groups <- rep(list(NA_integer_), group_num)
for (k in sort(orders, decreasing = TRUE)) {
sums <- vapply(1:group_num, function (s) as.integer(sum(groups[[s]], na.rm = TRUE)), integer(1))
index <- ifelse(any(sums + k <= num), which(sums + k <= num)[which.min(abs(sums[which(sums + k <= num)]+k - num))], NA_integer_)
index <- ifelse(is.na(index), which.min(sums), index)
groups[[index]] <- append(groups[[index]],k)
groups[[index]] <- groups[[index]][!is.na(groups[[index]])]
}
groups
}
allocate_groups <- function (orders, num, group_num) {
groups <- create_groups(orders, num, group_num)
g <- rep(seq_along(groups), sapply(groups, length))
out <- cbind(orders, group = g[match(orders, unlist(groups))])
out
}
# results above
The added constraint actually makes the problem somewhat simpler: There are (at most) n
drawers that we want to fill with orders
and any total sum should be as close to num
as possible.
The Function
Here is the full code of the function
find_grouping <- function (orders, num) {
combs2 <- RcppAlgos::comboGeneral(orders, 2L, constraintFun = 'sum')
combs2 <- cbind.data.frame(combs2,close=abs(num - combs2[,3]))
out <- integer(length(orders))
skip <- NA_integer_
group <- 1L
for (k in seq_along(out)) {
val1 <- orders[k]
if (val1 %in% skip) next
ind1 <- (.subset2(combs2,1L) == val1) | (.subset2(combs2,2L) == val1)
ind2 <- (which.min(.subset2(combs2, 4L)[ind1]))
ind3 <- which(ind1)[ind2]
val2 <- .subset2(combs2, 3L)[ind3]
if (abs(num-val1) <= abs(num-val2)) {
out[k] <- group
group <- group + 1L
next
}
intList <- as.integer(combs2[ind3,1:2])
ordersRemain <- setdiff(orders, intList)
if (abs(num-val2) <= abs(num-val2-min(ordersRemain))) {
skip <- c(skip, intList)
out[orders %in% intList] <- group
group <- group + 1
next
}
val3 <- val2
cond <- FALSE
while (!cond) {
toAdd <- which.min(abs(num - (val2 + ordersRemain)))
val3 <- val3 + ordersRemain[toAdd]
intList <- c(intList, ordersRemain[toAdd])
ordersRemain <- ordersRemain[-toAdd]
cond <- abs(num-val3) <= abs(num-val2-min(ordersRemain))
}
skip <- c(skip, intList)
out[orders %in% intList] <- group
group <- group + 1
}
cbind(orders,group=out)
}
Explanation
The first step was to generate all combinations (of 2) of orders
using RcppAlgos::comboGeneral
(it is a rather fast method)
# num
combs <- RcppAlgos::comboGeneral(orders, 2L, constraintFun = 'sum')
combs <- cbind.data.frame(combs,close=abs(num - combs[,3])) # check how far from num are the combinations
# 1 2 3 close
# 1 100 198 298 2
# 2 100 50 150 150
# 3 100 40 140 160
# 4 100 215 315 15
# ...
From here on now there are several approaches. I opted for a loop where in each iteration I find the best combinations (i.e. closest to num
) for the current value orders[k]
, remember then given combination (e.g. 100;198
) and assign the combination a group
value.