I am trying to create a function that takes a set of observed and expected data points, determines the optimum function for calibration and applies this calibration on the whole data set (from which the data points are a subset). However, I want to ensure that the polynomial function that is fitted by either scipy.optimize.curve_fit
or numpy.polyfit
is monotonic (first derivative doesn't change sign).
The current test code that I have for this is below:
#! /usr/bin/env python
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import numpy as np
import math
import sys
import random
random.seed(28)
#############
# Test Data #
#############
measuredMaximaMZ = [100.0, 201.0, 222.0, 401.0, 500.0]
presentCalibrants = [100.5, 200.5, 300.5, 400.5, 500.5]
Fake_Raw_X = np.linspace(100,500,1000)
Fake_Raw_Y = random.sample(xrange(100000), 1000)
#############
# Functions #
#############
def powerLaw(x,a,b,c):
penalty = 0
if b > 2.:
penalty = abs(b-1.)*10000
if b < 0.:
penalty = abs(2.-b)*10000
return a*x**b + c + penalty
class powerLawCall:
def __init__(self,x,a,b,c):
self.a = a
self.b = b
self.c = c
self.f = lambda x: a*x**b+c
def __call__(self,x):
return self.f(x)
def describe(self):
return str(self.a)+"*X^"+str(self.b)+"+"+str(self.c)
def performCalibration(measured, expected):
RMS = sys.maxint
func = None
# Power Law
z = curve_fit(powerLaw, measured, expected)
RMS_buffer = []
for index, i in enumerate(measured):
RMS_buffer.append((powerLaw(i,*z[0])-expected[index])**2)
RMS_buffer = np.mean(RMS_buffer)
RMS_buffer = math.sqrt(RMS_buffer)
if RMS_buffer < RMS:
RMS = RMS_buffer
func = powerLawCall(0,*z[0])
# Polynomials between 1 and len(expected)
for i in range(1,len(expected)):
RMS_buffer = []
z = np.polyfit(measured, expected, i)
f = np.poly1d(z)
for index, j in enumerate(measured):
RMS_buffer.append((f(j) - expected[index])**2)
RMS_buffer = np.mean(RMS_buffer)
RMS_buffer = math.sqrt(RMS_buffer)
if RMS_buffer < RMS:
RMS = RMS_buffer
func = f
return func
#############
# Test Main #
#############
f = performCalibration(measuredMaximaMZ, presentCalibrants)
if isinstance(f,powerLawCall):
label = f.describe()
else:
label = ""
for index,i in enumerate(f):
if index < len(f):
label += "{0:.2e}".format(i)+"x^"+str(len(f)-index)+" + "
else:
label += "{0:.2e}".format(i)
# Write results
with open("UPC Results.txt",'w') as fw:
fw.write("Function: "+str(label)+"\n")
fw.write("Expected\tOriginal\tCalibrated\n")
for index, i in enumerate(measuredMaximaMZ):
fw.write(str(presentCalibrants[index])+"\t"+str(i)+"\t"+str(f(i))+"\n")
# Plotting
X_data = []
Y_data = []
for index, i in enumerate(measuredMaximaMZ):
X_data.append(presentCalibrants[index])
Y_data.append(f(i))
newX = np.linspace(X_data[0],X_data[-1],1000)
newY = f(newX)
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(211)
plt.scatter(X_data,measuredMaximaMZ,c='b',label='Raw',marker='s',alpha=0.5)
plt.scatter(X_data,Y_data,c='r',label='Calibrated',marker='s',alpha=0.5)
plt.plot(newX,newY,label="Fit, Function: "+str(label))
plt.legend(loc='best')
plt.title("UPC Test")
plt.xlabel("Expected X")
plt.ylabel("Observed X")
ax = fig.add_subplot(212)
plt.plot(Fake_Raw_X,Fake_Raw_Y,c='b')
plt.plot(f(Fake_Raw_X),Fake_Raw_Y,c='r')
plt.xlabel("X")
plt.ylabel("Y")
plt.show()
Which yields the following non-monotonic calibration curve (and data points) for the listed test-data (first plot in the picture):
The problem arises around the 300-450 in the X region post-calibration as can be seen from the second plot in this picture (Red is post-calibration, which has multiple Y-values for the X-values in that range):
Update
I have since uncovered how to specify bounds for the function using the bounds
part of curve_fit as per this question. The question then specifically is if there is a way to restrain curve_fit
or polyfit
to use only monotonic (polynomial) functions (the derivative of the function that it returns has the same sign throughout the specified region).