I am running the following code:
def get_previous_next_returns(portfolio,total_returns):
assets=[]
i=0
for asset in portfolio:
i+=1
try:
for offset in [1,5,15,30,45,60,75,90,120,150,
200,250,500,750,1000,1250,1500]:
print(i,asset.name,offset)
asset['return_stock'] = (asset.Close - asset.Close.shift(1)) / (asset.Close.shift(1))
merged_data = pd.merge(asset, sp_500, on='Date')
total_positive_days=0
total_beating_sp_days=0
total_days = offset
for index in range(0,len(merged_data)):
if index-offset>0:
#for index, row in merged_data.iterrows():
#print(offset, index)
sliced = merged_data.iloc[index - offset : index]
total_positive_days = (sliced.Close_x > sliced.Close_x.shift(1)).sum()
total_beating_sp_days = (sliced.return_stock > sliced.return_sp).sum()
percentage_of_positive_days = float(total_positive_days/total_days)
percentage_of_beating_days = float(total_beating_sp_days/total_days)
asset.loc[index,'Pct_positive_'+str(offset)] = percentage_of_positive_days
asset.loc[index,'Pct_beating_'+str(offset)] = percentage_of_beating_days
# previous period returns
asset['Pct_change_'+str(offset)] = asset['Close'].pct_change(periods = offset)
# next period returns
asset['Pct_change_plus_'+str(offset)] = asset['Close'].pct_change(periods = -offset)
assets.append(asset)
total_returns=total_returns.append(asset)
except IndexError:
print("Index error")
return assets, total_returns
The problem is the dataframe I am running it on (merged_data) is very large (over 1 million rows) thus the code takes many hours to complete... Is there a way to somehow speed it up (i.e. replacign the for loop with a more efficient code block?