I found the explication about how to enter link description here I need to compare my distribution based on Kolmogorov-Smirnov Test between my sample and each of the distributions to the fit. But I do not know how to interpret results and choose the best distribution based on this test? This code does not implement Kolmogorov-Smirnov Test.So 1 -How to implement the kolmogorov-smirnov test? 2 - How to choose the best distribution?
def best_fit_distribution(data, bins=200, ax=None):
"""Model data by finding best fit distribution to data"""
# Get histogram of original data
y, x = np.histogram(data, bins=bins, density=True)
x = (x + np.roll(x, -1))[:-1] / 2.0
# Distributions to check
DISTRIBUTIONS = [st.alpha, st.anglit]
# Best holders
best_distribution = st.norm
best_params = (0.0, 1.0)
best_sse = np.inf
runs = []
# Estimate distribution parameters from data
for distribution in DISTRIBUTIONS:
# Try to fit the distribution
try:
# Ignore warnings from data that can't be fit
with warnings.catch_warnings():
warnings.filterwarnings('ignore')
# fit dist to data
params = distribution.fit(data)
print(params)
# Separate parts of parameters
arg = params[:-2]
print(arg)
loc = params[-2]
print(loc)
scale = params[-1]
print(scale)
# Calculate fitted PDF and error with fit in distribution
pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)
sse = np.sum(np.power(y - pdf, 2.0))
# if axis pass in add to plot
try:
if ax:
pd.Series(pdf, x).plot(ax=ax)
end
except Exception:
pass
runs.append([distribution.name, sse])
# identify if this distribution is better
if best_sse > sse > 0:
best_distribution = distribution
best_params = params
best_sse = sse
except Exception:
pass
print(runs)
return (best_distribution.name, best_params)