I am new to stackoverflow. Is there a way to speed this code up with vectorization, As I am not so advanced how can I do it? I am currently working on a csv dataset which I import using pandas. There are a couple of functions that allow to create labels and represent data for biases. The code takes some time to load and I am looking at options to speed it up in as much as possible. Thanks.
def create_labels(self):
sensitive_label = {}
for i in set(self.X_test[sensitive]):
text = “Please Enter Label for Group” +” “+ str(i)+“: ”
label = input(text)
sensitive_label[i]=label
return(sensitive_label)
def representation(self,sensitive, labels, predictions):
full_table = self.X_test.copy()
sens_df = {}
#Output is going to be a table
for i in labels:
full_table[‘p’] = predictions
full_table[‘t’] = self.y_test
sens_df[labels[i]] = full_table[full_table[sensitive]==i] #one table stored for female and one for male
contigency_p = pd.crosstab(full_table[sensitive], full_table[‘t’])
cp, pp, dofp, expectedp = chi2_contingency(contigency_p)
contigency_pct_p = pd.crosstab(full_table[sensitive], full_table[‘t’], normalize=‘index’)#p value of contigency table
sens_rep = {}
for i in labels:
sens_rep[labels[i]] = (self.X_test[sensitive].value_counts()/self.X_test[sensitive].value_counts().sum())[i]
labl_rep = {}
for i in labels:
labl_rep[str(i)] = (self.y_test.value_counts()/self.y_test.value_counts().sum())[i]
fig = make_subplots(rows=1, cols=2)
for i in labels:
fig.add_trace(go.Bar(
showlegend=False,
x = [labels[i]],
y= [sens_rep[labels[i]]]),row=1,col=1)
fig.add_trace(go.Bar(
showlegend=False,
x = [str(i)],
y= [labl_rep[str(i)]],
marker_color=[‘orange’,‘blue’][i]),row=1,col=2)
c, p, dof, expected = chi2_contingency(contigency_p)
cont_table = (tabulate(contigency_pct_p.T, headers=labels.values(), tablefmt=‘fancy_grid’))
return cont_table, sens_df, fig, p
#sens_df dataset based on the senstive labels