I'm trying to add two optional arguments to a function that trains a GLM using the statsmodel
package. I used this question to guide the development of the function: How do I create a Python function with optional arguments?
Basically, I want to give the user the ability to use OR not use weights and offsets.
This is the function:
def model_train(df, formula, *args, **kwargs):
'''
run non discrete model
df = model set
formula = model formula
weight = column used for weights
offset = column used for offsets
'''
weight = kwargs.get(df[weight], None)
print(f"Weights initialized....Starting to intialize offsets")
offset_factor = kwargs.get(df[offset], None)
#print(f"Offset initialized....starting matrix development")
y, x = patsy.dmatrices(formula, df, return_type = 'dataframe')
print(f"Matrix done...starting to instantiate model")
glm = sm.GLM(y, x, family = sm.families.Poisson(), var_weights = weight, offset = offset_factor)
print(f"Model instantiated....starting to fit")
glm_results = glm.fit()
print("Model fit. If you are reading this, you're done. Run 'model_object'[0].summary() to get summary statistics")
return glm_results, x, y
This is the error it throws:
---------------------------------------------------------------------------
UnboundLocalError Traceback (most recent call last)
<ipython-input-34-0ce97f02e15e> in <module>
----> 1 model_80150 = model_train(df = train_model1, formula=formula_80150, weight = 'eunit', offset = None)
~\Documents\GitHub\Edit\run_model.py in model_train(df, formula, *args, **kwargs)
7 offset = column used for offsets
8 '''
----> 9 weight = kwargs.get(df[weight], None)
10 print(f"Weights initialized....Starting to intialize offsets")
11
UnboundLocalError: local variable 'weight' referenced before assignment
EDIT UPDATE:
I've tried the following with a TypeError: unsupported operand type(s) for &: 'NoneType' and 'str'
error
def model_train(df, formula, *args, **kwargs):
'''
run non discrete model
df = model set
formula = model formula
weight = column used for weights
offset = column used for offsets
'''
weight_value = kwargs.get('weight', None)
print(f"Weights initialized....Starting to intialize offsets")
offset_factor = kwargs.get('offset', None)
print(f"Offset initialized....starting matrix development")
y, x = patsy.dmatrices(formula, df, return_type = 'dataframe')
print(f"Matrix done...starting to instantiate model")
if weight_value == None:
glm = sm.GLM(y, x, family = sm.families.Poisson())
elif weight_value == None & offset_factor != None:
glm = sm.GLM(y, x, family = sm.families.Poisson(), offset = df[offset_factor])
elif weight_value != None and offset_factor == None:
glm = sm.GLM(y, x, family = sm.families.Poisson(), var_weights = df[weight_value])
else:
glm = sm.GLM(y, x, family = sm.families.Poisson(), var_weights = df[weight_value], offset = df[offset_factor])
print(f"Model instantiated....starting to fit")
glm_results = glm.fit()
print("Model fit. If you are reading this, you're done. Run 'model_object'[0].summary() to get summary statistics")
return glm_results, x, y