I'm new to Python and I have some questions when applying code to adjust probability distributions.
I am trying to apply this code:
import scipy
import scipy.stats
import matplotlib
import matplotlib.pyplot as plt
class Distribution(object):
def __init__(self,dist_names_list = []):
self.dist_names = ['norm','lognorm','expon']
self.dist_results = []
self.params = {}
self.DistributionName = ""
self.PValue = 0
self.Param = None
self.isFitted = False
def Fit(self, y):
self.dist_results = []
self.params = {}
for dist_name in self.dist_names:
dist = getattr(scipy.stats, dist_name)
param = dist.fit(y)
self.params[dist_name] = param
#Applying the Kolmogorov-Smirnov test
D, p = scipy.stats.kstest(y, dist_name, args=param);
self.dist_results.append((dist_name,p))
#select the best fitted distribution
sel_dist,p = (max(self.dist_results,key=lambda item:item[1]))
#store the name of the best fit and its p value
self.DistributionName = sel_dist
self.PValue = p
self.isFitted = True
return self.DistributionName,self.PValue*emphasized text*
I understand that y
is my DataFrame with my sales data. However, I don't understand what the self
argument is. I tried this:
self = pd.DataFrame(columns=['dist_names','dist_results','params','DistributionName','PValue','Param','isFitted'])
But it did not work. Can someone help me please?