How do i count all combinations of k numbers from 1-n whose sum is equal to n? Like for n = 10, k = 3, we have (1, 2, 7), (1, 3, 6), (1, 4, 5), (2, 3, 5)
I've tried using itertools.combination but it grows really fast for big numbers
How do i count all combinations of k numbers from 1-n whose sum is equal to n? Like for n = 10, k = 3, we have (1, 2, 7), (1, 3, 6), (1, 4, 5), (2, 3, 5)
I've tried using itertools.combination but it grows really fast for big numbers
The smallest number we can make with k distinct positive integers is choose(k+1, 2).
Let r(n,k) = n - choose(k+1, 2).
Then the count of ways of forming n from k distinct integers is equal to the count of ways of summing k nonnegative non-necessarily-distinct integers to get r(n,k). The idea is we start with 1, 2, 3, ..., k, and then allocate r(n,k) to these starting integers in a nondecreasing manner.
E.g., 10, 3:
1 + 2 + 3 = choose(4,2) = 6, so r(10,3) = 10-6 = 4.
4 = 0+0+4, 0+1+3, 0+2+2, 1+1+2
(1,2,3) + (0,0,4) = (1,2,7)
(1,2,3) + (0,1,3) = (1,3,6)
(1,2,3) + (0,2,2) = (1,4,5)
(1,2,3) + (1,1,2) = (2,3,5)
So we've reduce the problem to counting the number of ways of summing k nonnegative integers to get r(n,k). Answered here
Ruby code (including util functions):
def choose(n, k)
return 0 if k > n
result = 1
1.upto(k) do |d|
result *= n
result /= d
n -= 1
end
return result
end
def count_partitions_nozeroes(n, k, cache = {})
return 1 if k==0 && n==0
return 0 if n <= 0 || k <= 0
# Check if the result is already memoized
if cache.key?([n, k])
return cache[[n, k]]
end
# Calculate the result
result = count_partitions_nozeroes(n-1, k-1, cache) + count_partitions_nozeroes(n-k, k, cache)
# Memoize the result for future use
cache[[n, k]] = result
return result
end
def count_partitions_zeros(n,k)
return count_partitions_nozeroes(n+k, k)
end
def solve(n,k)
r = n - choose(k+1,2)
return count_partitions_zeros(r,k)
end
Sample results
> solve(10,3)
=> 4
> solve(200,10)
=> 98762607
> solve(2000,10)
=> 343161146717017732
> solve(2000,100) # correct that there's no solution
=> 0
> solve(2000,40)
=> 2470516759655914864269838818691
> solve(5000,50)
=> 961911722856534054414857561149346788190620561928079
> solve(9000,100)
=> 74438274524772625088229884845232647085568457172246625852148213
Here's a simpler Ruby version that avoids recursion (other methods unchanged). It gives the same results as above. A few results for larger numbers shown below. This version is O(n*r).
def count_partitions_nozeroes(n, k)
n_to_k_to_count = Hash.new{|h, n| h[n] = Hash.new{|h2, k| h2[k] = 0}}
n_to_k_to_count[n][k] = 1
(n).downto(1) do |cur_n|
n_to_k_to_count.delete(cur_n + 1) # delete old keys to save space
n_to_k_to_count[cur_n].keys.each do |cur_k|
n_to_k_to_count[cur_n - 1][cur_k - 1] += n_to_k_to_count[cur_n][cur_k] if cur_n >= 1 && cur_k >= 1
n_to_k_to_count[cur_n - cur_k][cur_k] += n_to_k_to_count[cur_n][cur_k] if cur_n >= cur_k && cur_k >= 0
end
end
return n_to_k_to_count[0][0]
end
Sample results
> solve(10_000, 100)
=> 274235043379646744332574760930015102932669961381003514201948469288939
> solve(20_000, 100)
=> 7299696028160228272878582999080106323327610318395689691894033570930310212378988634117070675146218304092757
> solve(30_000, 100)
=> 272832080760303721646457320315409638838332197621252917061852201523368622283328266190355855228845140740972789576932357443034296
> solve(40_000, 200)
=> 1207940070190155086319681977786735094825631330761751426889808559216057614938892266960158470822904722575922933920904751545295375665942760497367
> solve(100_000, 200)
=> 13051215883535384859396062192804954511590479767894013629996324213956689010966899432038449004533035681835942448619230013858515264041486939129111486281204426757510182253404556858519289275662797170197384965998425620735381780708992863774464769
> solve(1_000_000, 200) # getting painfully slow; 3.5 mins
=> 42888085617859871072014862493356049406160707924757355757377806772267059145453158292921778894240787681100326388859698107659554647376742676484705287095709871992089520633323366183055674466048100639306064833776787643422680599710237129079050538847275806415974795879584513402381125673297339438303953873226899382823803432464875135708283442981500695089121425622135472568284901515995857775659213466818843464541496090119445962587194304280691087464026800781
A recursive approach with caching can produce results in a reasonable time:
from functools import lru_cache
@lru_cache(None)
def countNK(n,k,t=None):
t = n if t is None else t # track target partial sum
if k == 0: return int(t==0) # empty set can sum to zero
if t < 1 : return 0 # valid target only
if k > n : return 0 # not enough values
return countNK(n-1,k,t)+countNK(n-1,k-1,t-n) # combine counts
n
...
output:
print(countNK(10,3)) # 4
print(countNK(200,10)) # 98762607
If you need to process large values of n
(e.g. 500+), you'll either need to increase the recursion limit or convert the function to an iterative loop
Benchmark with n=100 and all k from 0 to 100, Kelly*
are my solutions:
2.5 ± 0.1 ms Kelly
2.8 ± 0.2 ms Kelly2
3.5 ± 0.2 ms Dave_translated_by_Kelly
295.0 ± 23.7 ms Alain
Let c(n, k) be the number of combinations with sum n with k different numbers 1 or larger.
We get: c(n, k) = c(n-k, k) + c(n-k, k-1)
You want sum n with k different numbers 1 or larger. You either use the number 1 in the combination or you don't.
The faster solutions with Dave's case n=9000, k=100:
469.1 ± 9.2 ms Kelly2
478.8 ± 17.0 ms Kelly
673.4 ± 18.8 ms Dave_translated_by_Kelly
Code (Attempt This Online!):
def Kelly(n, k):
if k == 0:
return 1 if n == 0 else 0
@cache
def c(n, k):
if n < k * (k+1) // 2:
return 0
if k == 1:
return 1
n -= k
return c(n, k) + c(n, k-1)
return c(n, k)
# Precompute the bounds for the "n < ..." base case
def Kelly2(n, k):
if k == 0:
return 1 if n == 0 else 0
min_n_for_k = list(accumulate(range(k+1)))
@cache
def c(n, k):
if n < min_n_for_k[k]:
return 0
if k == 1:
return 1
n -= k
return c(n, k) + c(n, k-1)
return c(n, k)
def Alain(n, k):
@lru_cache(None)
def countNK(n,k,t=None):
t = n if t is None else t # track target partial sum
if k == 0: return int(t==0) # empty set can sum to zero
if t < 1 : return 0 # valid target only
if k > n : return 0 # not enough values
return countNK(n-1,k,t)+countNK(n-1,k-1,t-n) # combine counts
return countNK(n, k)
def Dave_translated_by_Kelly(n, k):
def choose(n, k):
if k > n: return 0
result = 1
for d in range(1, k+1):
result *= n
result //= d
n -= 1
return result
def count_partitions_nozeroes(n, k, cache = {}):
if k==0 and n==0: return 1
if n <= 0 or k <= 0: return 0
# Check if the result is already memoized
if (n, k) in cache:
return cache[n, k]
# Calculate the result
result = count_partitions_nozeroes(n-1, k-1, cache) + count_partitions_nozeroes(n-k, k, cache)
# Memoize the result for future use
cache[n, k] = result
return result
def count_partitions_zeros(n,k):
return count_partitions_nozeroes(n+k, k)
def solve(n,k):
r = n - choose(k+1,2)
return count_partitions_zeros(r,k)
return solve(n, k)
big = False
funcs = Alain, Kelly, Kelly2, Dave_translated_by_Kelly
if big:
funcs = funcs[1:]
from functools import lru_cache, cache
from itertools import accumulate
from time import perf_counter as time
from statistics import mean, stdev
import sys
import gc
# Correctness
for n in range(51):
for k in range(51):
expect = funcs[0](n, k)
for f in funcs[1:]:
result = f(n, k)
assert result == expect
# Speed
sys.setrecursionlimit(20000)
times = {f: [] for f in funcs}
def stats(f):
ts = [t * 1e3 for t in sorted(times[f])[:5]]
return f'{mean(ts):5.1f} ± {stdev(ts):4.1f} ms '
for _ in range(25):
for f in funcs:
gc.collect()
t0 = time()
if big:
f(9000, 100)
else:
for k in range(101):
f(100, k)
times[f].append(time() - t0)
for f in sorted(funcs, key=stats):
print(stats(f), f.__name__)
Let's introduce a function: f(n,k,s)
=number of combinations of k
numbers from 1 to n
, having s
as their sum.
To solve the task we need to calculate f(n,k,n)
.
The function may be calculated recursively. All combinations can be split into two groups: with and without the max number. That gives us f(n,k,s)=f(n-1,k-1,s-n)+f(n-1,k,s)
. The recursion may stop in the following cases:
There are N^2*k
possible combinations of arguments, so if we cache the already calculated values, we will be within O(N^3)
.