In scikit-learn fitting a gaussian peak using GMM seems to work with discrete data data points. Is there a way of using GMM with data which has already been binned, or aggregated into a histogram?
For example, the following code is a work-around which converts the binned data into discrete data points before fitting:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import mixture
def fit_one_peak(x, linspace):
gmm = mixture.GMM(n_components=1) # gmm for one components
gmm.fit(x) # train it!
m1 = gmm.means_
w1 = gmm.weights_
return np.exp(gmm.score_samples(linspace)[0]), m1[0][0], w1[0]
def convert_to_signal(d, s):
c = []
count = 0
for i in s:
for j in range(int(d[count])): # No floats!
c.append(i)
count += 1
return c
d = [0.5, 2, 5, 3, 1, 0.5] # y data, which is already binned
s = [0, 1, 2, 3, 4, 5] # x data
signal = convert_to_signal(d, s)
linspace = np.linspace(s[0], s[-1], len(s))
l, mean, weight = fit_one_peak(signal, linspace)
l = l*(np.max(d)/ np.max(l)) # Normalize the fitted y
fig = plt.figure()
plt.plot(s, d, label='Original')
plt.plot(linspace, l, label='Fitted')
plt.hist(signal, label='Re-binned')
plt.legend()