This will be a deep pain to run optimization on unless it runs faster. From top to bottom it hasn't been vectorized, and that needs to happen. The "easy" pass is:
from typing import Sequence
import numpy as np
from scipy.optimize import Bounds, minimize
v = 7
st = 2
exp_v = np.exp(-1/v)
def periodic(values: Sequence[float], i: int | Sequence[int]) -> float | Sequence[float]:
"""Periodic indexation"""
return values[i % len(values)]
def rt_inner(values: np.ndarray, j: int, k: int) -> float:
i = np.arange(-k, k + 1)
per = periodic(values, j + i)
quotient = (
exp_v**(
(k - np.abs(i)).dot(per)
) - exp_v**(
(k - np.abs(i) + 1).dot(per)
)
)/(
per.sum() + 1e-10
)
return quotient
def repeat_time(values: np.ndarray, j: int, L: int) -> float:
"""Main function (index j)"""
return sum(
rt_inner(values, j, k) for k in range(L + 1)
)
def function_points(values):
"""Vector function"""
return [
repeat_time(values, j, len(values) // st)
for j in range(len(values))
]
def fun(values: np.ndarray, ltest0: np.ndarray) -> float:
"""compute the difference between ltest and the list generated by repeat_time"""
inner = function_points(values) - ltest0
mse = inner.dot(inner)
return mse
def regression_test() -> None:
x0 = np.pi/4/v / ltest**2
assert np.allclose(x0[:10], [2.210536497254391e-05, 2.206056701657588e-05, 2.201689200243481e-05, 2.1974207592018127e-05, 2.1932628431138212e-05,
2.18921456967544e-05, 2.185275082723814e-05, 2.181437468386809e-05, 2.177707036172205e-05, 2.1740769518783152e-05])
assert np.isclose(
0.35447169592652333,
repeat_time(np.linspace(0.5, 1.5, 9), 0, 9 // st),
)
assert np.isclose(
0.37331496035895984,
repeat_time(np.linspace(0.5, 1.5, 9), 1, 9 // st),
)
assert np.allclose(
function_points(np.arange(20)),
[0.048562605686658246, 0.2561333409217431, 0.22284510468654223, 0.1801348324371491, 0.15240596740034554,
0.13317771870433162, 0.11878913224487342, 0.10747112835180678, 0.0982518235968824, 0.09054651477020922,
0.08397926925966721, 0.07829570944628872, 0.07331651598542453, 0.06891093040237876, 0.06498083918680733,
0.06145079756314917, 0.05826155047254675, 0.05536569520658036, 0.05272480791861977, 0.050932251718794584]
)
assert np.isclose(
140202.5913000041,
fun(x0, ltest),
)
def optimize():
x0 = np.pi/4/v / ltest**2
res = minimize(fun=fun, x0=x0, args=(ltest,), bounds=Bounds(0, np.inf))
# The optimized list is stored in res.x
opt_list = res.x
if __name__ == '__main__':
regression_test()
# optimize()
The "hard" part comes in because your inner operation is not square, it's triangular:
from typing import Sequence, Any
import numpy as np
from scipy.optimize import Bounds, minimize
v = 7
st = 2
exp_v = np.exp(-1/v)
L_structures = {}
def make_L_structures(L: int) -> None:
k = np.arange(L+1)[:, np.newaxis]
irhs = k - np.arange(L+1)[np.newaxis, :]
ilhs = k - np.arange(1, L+1)[np.newaxis, :]
L_structures['ki_tri'] = np.hstack((np.tril(ilhs, k=-1), np.tril(irhs)))
L_structures['ki_trip1'] = np.hstack((np.tril(ilhs+1, k=-1), np.tril(irhs+1)))
def rt_inner(values: np.ndarray, j: int, L: int) -> float:
index_into = np.tile(values, 2)
val_tri_rhs = np.tril(
np.broadcast_to(index_into[np.newaxis, j:L+j+1], (L+1, L+1))
)
val_tri_lhs = np.tril(
np.broadcast_to(index_into[np.newaxis, -values.size+j-1: -values.size+j-1-L:-1], (L+1, L)),
k=-1,
)
val_tri = np.hstack((val_tri_lhs, val_tri_rhs))
product = (
exp_v**(
(L_structures['ki_tri'] * val_tri).sum(axis=1)
) - exp_v**(
(L_structures['ki_trip1'] * val_tri).sum(axis=1)
)
)/(
val_tri.sum(axis=1) + 1e-10
)
return product
def repeat_time(values: np.ndarray, j: int, L: int) -> float:
"""Main function (index j)"""
return rt_inner(values, j, L).sum()
def function_points(values):
"""Vector function"""
return [
repeat_time(values, j, len(values) // st)
for j in range(len(values))
]
def fun(values: np.ndarray, ltest0: np.ndarray) -> float:
"""compute the difference between ltest and the list generated by repeat_time"""
inner = function_points(values) - ltest0
mse = inner.dot(inner)
return mse
def regression_test() -> None:
x0 = np.pi/4/v / ltest**2
assert np.allclose(x0[:10], [2.210536497254391e-05, 2.206056701657588e-05, 2.201689200243481e-05, 2.1974207592018127e-05, 2.1932628431138212e-05,
2.18921456967544e-05, 2.185275082723814e-05, 2.181437468386809e-05, 2.177707036172205e-05, 2.1740769518783152e-05])
make_L_structures(L=9//st)
assert np.isclose(
0.35447169592652333,
repeat_time(np.linspace(0.5, 1.5, 9), 0, 9 // st),
)
assert np.isclose(
0.37331496035895984,
repeat_time(np.linspace(0.5, 1.5, 9), 1, 9 // st),
)
make_L_structures(L=20//st)
assert np.allclose(
function_points(np.arange(20)),
[0.048562605686658246, 0.2561333409217431, 0.22284510468654223, 0.1801348324371491, 0.15240596740034554,
0.13317771870433162, 0.11878913224487342, 0.10747112835180678, 0.0982518235968824, 0.09054651477020922,
0.08397926925966721, 0.07829570944628872, 0.07331651598542453, 0.06891093040237876, 0.06498083918680733,
0.06145079756314917, 0.05826155047254675, 0.05536569520658036, 0.05272480791861977, 0.050932251718794584]
)
make_L_structures(L=ltest.size//st)
assert np.isclose(
140202.5913000041,
fun(x0, ltest),
)
def optimize():
make_L_structures(L=ltest.size//st)
x0 = np.pi/4/v / ltest**2
res = minimize(fun=fun, x0=x0, args=(ltest,), bounds=Bounds(0, np.inf))
# The optimized list is stored in res.x
opt_list = res.x
if __name__ == '__main__':
regression_test()
# optimize()
(Much) more can be done to vectorize this, such as 2.5-dimensional data (rectangular, packed triangular) - though it needs more work because binning is slow:
from time import monotonic
import cProfile
from typing import Any
import numpy as np
import scipy.optimize
class Problem:
__slots__ = (
'ltest', 'exp_v', 'v', 'L',
'value_idx', 'yj', 'ki',
)
def __init__(
self,
ltest: np.ndarray,
v: int = 7,
st: int = 2,
) -> None:
self.ltest = ltest
self.exp_v = np.exp(-1/v)
self.v = v
self.L = L = ltest.size // st
# Right and left triangular indices for expanding (-k, k) sum
yr, xr = np.tril_indices(n=L + 1, m=L + 1)
yl, xl = np.tril_indices(n=L + 1, m=L + 1, k=-1)
y = np.concatenate((yl, yr))
j = np.arange(ltest.size)[:, np.newaxis]
self.yj = (y[np.newaxis, :] + j*(L+1)).ravel()
# Right and left value array indices
vr = xr
vl = -1 - xl
# j-offset periodic indexing
value_idx = (np.concatenate((vl, vr)) + j) % ltest.size
# bincount does not natively work with n-dimensional data. One dimension needs to be collapsed.
self.value_idx = value_idx.ravel()
# Right and left k-abs(i) terms
kir = yr - xr
kil = kir[:-L - 1]
ki = np.concatenate((kil, kir))
self.ki = np.broadcast_to(ki, value_idx.shape).ravel()
def function_points(self, values: np.ndarray) -> np.ndarray:
values_used = values[self.value_idx]
sumsv = np.bincount(self.yj, weights=values_used)
p0, p1 = self.exp_v ** np.vstack((
np.bincount(self.yj, weights=self.ki * values_used),
np.bincount(self.yj, weights=(self.ki+1) * values_used),
))
product = (
(p0 - p1)/(sumsv + 1e-10)
).reshape((values.size, -1))
return product.sum(axis=1)
def mse(self, values: np.ndarray) -> float:
"""compute the difference between ltest and the list generated by function_points"""
inner = self.function_points(values) - self.ltest
return inner.dot(inner)
def x0(self) -> np.ndarray:
return np.pi/4/self.v / self.ltest**2
def optimize(self, **kwargs: Any) -> scipy.optimize.OptimizeResult:
return scipy.optimize.minimize(
fun=self.mse,
x0=self.x0(),
bounds=scipy.optimize.Bounds(0, np.inf),
options=kwargs,
)
def regression_test() -> None:
p = Problem(np.arange(20))
assert np.allclose(
p.function_points(np.arange(20)),
[0.048562605686658246, 0.2561333409217431, 0.22284510468654223, 0.1801348324371491, 0.15240596740034554,
0.13317771870433162, 0.11878913224487342, 0.10747112835180678, 0.0982518235968824, 0.09054651477020922,
0.08397926925966721, 0.07829570944628872, 0.07331651598542453, 0.06891093040237876, 0.06498083918680733,
0.06145079756314917, 0.05826155047254675, 0.05536569520658036, 0.05272480791861977, 0.050932251718794584]
)
p = Problem(ltest)
x0 = np.pi/4/p.v / ltest**2
assert np.allclose(x0[:10], [2.210536497254391e-05, 2.206056701657588e-05, 2.201689200243481e-05, 2.1974207592018127e-05, 2.1932628431138212e-05,
2.18921456967544e-05, 2.185275082723814e-05, 2.181437468386809e-05, 2.177707036172205e-05, 2.1740769518783152e-05])
t0 = monotonic()
assert np.isclose(140202.5913000041, p.mse(x0))
t1 = monotonic()
print(t1 - t0) # 6+ seconds
def profile() -> None:
"""
37 function calls (35 primitive calls) in 8.537 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.056 0.056 8.537 8.537 76637914.py:198(mse)
1 2.674 2.674 8.481 8.481 76637914.py:185(function_points)
6/4 5.806 0.968 5.806 1.452 {built-in method numpy.core._multiarray_umath.implement_array_function}
3 0.000 0.000 5.804 1.935 <__array_function__ internals>:177(bincount)
"""
p = Problem(ltest)
with cProfile.Profile() as pr:
p.mse(p.x0())
pr.print_stats(sort='cumtime')
def optimize() -> None:
prob = Problem(ltest)
result = prob.optimize(disp=True)
print(result)
if __name__ == '__main__':
# regression_test()
# profile()
optimize()
Even this gets, at best, six seconds (!) per iteration. A serious treatment of this problem will need to drop down to C and/or take advantage of a GPU.