I am trying to run several data frames through a pipeline to permanently alter each data frame but the changes are not persisting outside of the for loop. Can someone tell me the correct syntax to do this? All edits assign and return a data frame as edit_1g() does. Thank you.
# create pipeline to preprocess the data:
def pipeline_1(df):
df1=(df.pipe(edit_2a)
.pipe(edit_2b)
.pipe(edit_2d)
.pipe(edit_2e)
.pipe(edit_1f)
.pipe(edit_1j)
.pipe(edit_1g)
.pipe(edit_2h)
)
return df1
# list the data frames we want to run through our pipeline:
dfs = {'df_orders':df_orders, 'df_accts_summary':df_accts_summary, 'df_accts1':df_accts1,
'df_traders_summary':df_traders_summary, 'df_traders1':df_traders1,
'df_tag76_summary':df_tag76_summary, 'df_tag761':df_tag761}
print('data frames altered via pipeline_1: \n')
for key, values in dfs.items():
values = pipeline_1(values) # changes aren't persisting outside of the loop
print(key + ' ' + str(values.shape))
# round the decimals of columns:
def edit_1g(df):
d = {'icpwp10bp':0, 'icpwp2bp':0, 'icslippagebpbp':0, 'participationrate':0, 'adv':1, 'twodprioris':0,
'twodpostis':0, 'orderval':0, 'valuedark':0, 'mktvalflt':0, 'numberoffills': 0, 'size':0,
'lmtadjintvwap':0, 'fivedsprd':0, 'tendvol':0
}
df = df.round(d)
return df