I worked the version of the algorithm referenced by glinka in a previous comment, which is an improvement of the penalized weighted linear squares method published in a relatively recent paper. I took Rustam Guliev's code to build this one:
from scipy import sparse
from scipy.sparse import linalg
import numpy as np
from numpy.linalg import norm
def baseline_arPLS(y, ratio=1e-6, lam=100, niter=10, full_output=False):
L = len(y)
diag = np.ones(L - 2)
D = sparse.spdiags([diag, -2*diag, diag], [0, -1, -2], L, L - 2)
H = lam * D.dot(D.T) # The transposes are flipped w.r.t the Algorithm on pg. 252
w = np.ones(L)
W = sparse.spdiags(w, 0, L, L)
crit = 1
count = 0
while crit > ratio:
z = linalg.spsolve(W + H, W * y)
d = y - z
dn = d[d < 0]
m = np.mean(dn)
s = np.std(dn)
w_new = 1 / (1 + np.exp(2 * (d - (2*s - m))/s))
crit = norm(w_new - w) / norm(w)
w = w_new
W.setdiag(w) # Do not create a new matrix, just update diagonal values
count += 1
if count > niter:
print('Maximum number of iterations exceeded')
break
if full_output:
info = {'num_iter': count, 'stop_criterion': crit}
return z, d, info
else:
return z
In order to test the algorithm, I created a spectrum similar to the one shown in Fig. 3 of the paper, by first generating a simulated spectra consisting of multiple Gaussian peaks:
def spectra_model(x):
coeff = np.array([100, 200, 100])
mean = np.array([300, 750, 800])
stdv = np.array([15, 30, 15])
terms = []
for ind in range(len(coeff)):
term = coeff[ind] * np.exp(-((x - mean[ind]) / stdv[ind])**2)
terms.append(term)
spectra = sum(terms)
return spectra
x_vals = np.arange(1, 1001)
spectra_sim = spectra_model(x_vals)
Then, I created a third-order interpolating polynomial using 4 points taken directly from the paper:
from scipy.interpolate import CubicSpline
x_poly = np.array([0, 250, 700, 1000])
y_poly = np.array([200, 180, 230, 200])
poly = CubicSpline(x_poly, y_poly)
baseline = poly(x_vals)
noise = np.random.randn(len(x_vals)) * 0.1
spectra_base = spectra_sim + baseline + noise
Finally, I used the baseline correction algorithm to subtract the baseline out of the altered spectra (spectra_base
):
_, spectra_arPLS, info = baseline_arPLS(spectra_base, lam=1e4, niter=10,
full_output=True)
The results were (for reference, I compared with the pure ALS implementation by Rustam Guliev's, using lam = 1e4
and p = 0.001
):
