I have a problem with a Python code when going through large CSV files.
Basically, the code inputs a CSV file and outputs another CSV file with some math calculations. The input file is really big (about 300,000 rows). The problem is that when I run the code, Python will automatically stop after a while. When I press Ctrl+C, the code starts running again. (The code takes about 7 minutes to run)
I am sure there is nothing wrong is the code, because when the CSV file is made smaller, the code runs properly. This problem only occurs when the file is large. Pressing Ctrl+C a few times throughout execution will produce the output file correctly. There is also no Error message throughout.
Although the file is big, it isn't big enough such that Python can't handle it. I am using Windows 10, Python 3.7.1, my computer RAM is 16GB. The CPU and Memory usage do not reach max when running the code.
import pandas as pd
import numpy as np
import black_scholes as bs
from datetime import date
import time
import glob
t0 = time.time()
## Initiate all the steps
def start():
global df,bs_list,errors
global r
files = glob.glob('./*.csv')
print(files)
for name in files:
t0 = time.time()
df = open_file(name[2:])
print(name[2:])
#df['DTE'] = df.apply(calc.time, axis=1)
df['DTE'] = np.vectorize(calc.dte)(df['DataDate'],df['Expiration'])
print('DTE column ... done')
r = calc.fed_rate(df.loc[10]['DataDate'][:10])
print('R calc ... done')
bs_list,errors = [],[]
#df['IV'] = df.apply(add.iv,axis=1)
df['IV'] = np.vectorize(add.iv) (df['Bid'],df['Ask'],df['Strike'],df['UnderlyingPrice'],df['DTE'],df['Type'])
print('IV calc ... done')
df['Delta'],df['Gamma'],df['Theta'],df['Vega'] = np.vectorize(add.greeks)(df['IV'],df['Bid'],df['Ask'],df['Strike'],df['UnderlyingPrice'],df['DTE'],df['Type'])
create_file(name[2:-4]+'_edited.csv',df)
print(name[2:-4]+'_edited.csv'+' ... Addition successful')
t1 = time.time()
total = t1-t0
print(total)
## Open .csv file using pandas
def open_file(filename):
df = pd.read_csv(filename)
df.columns = df.columns.str.replace(' ', '')
return df
def create_file(filename,df):
df.to_csv(filename, encoding='utf-8', index=False)
class calc:
## Calculate the DTE of all options
def dte(DataDate,Expiration):
start,end = DataDate[:10],Expiration
start,end = start.split('/'),end.split('/')
d0 = date(int(start[2]),int(start[0]),int(start[1]))
d1 = date(int(end[2]),int(end[0]),int(end[1]))
delta = d1 - d0
return delta.days
## Finds the risk-free interest rate of that day
def fed_rate(date):
file = pd.read_csv('fed_rate.csv')
date = date.split('/')
date[0] = date[0].replace('0','')
date[1] = date[1].replace('0','')
for index, row in file.iterrows():
fed_date = row['THE_DATE']
fed_date = fed_date.split('/')
if fed_date[2] == date[2] and fed_date[0] == date[0]:
return row['FEDFUNDS']
class add:
## Calculate IV of each option
def iv(bid,ask,strike,price,dte,c_p):
try:
bid,ask,strike,price,dte = float(bid),float(ask),float(strike),float(price),float(dte)
if bid <= 0.1:
return 0
elif ask <= 0.1:
return 0
option_price = (bid+ask)/2
bs_list.append([price,dte/365,strike,r/100,option_price,0])
print(len(bs_list))
if len(bs_list) == 100064 or len(bs_list) == 100065:
print(bs_list[-1])
if c_p == 'call':
iv = bs.call.vol(price,dte/365,strike,r/100,option_price,0)
elif c_p == 'put':
iv = bs.put.vol(price,dte/365,strike,r/100,option_price,0)
except:
try:
errors.append(['iv',price,dte/365,strike,r/100,option_price,0])
except:
pass
return 0
return iv
## Add Greeks to file
def greeks(iv,bid,ask,strike,price,dte,c_p):
if iv == 0:
return 0,0,0,0
try:
## Delta
if c_p == 'call':
delta = bs.greeks.delta(float(price),dte/365,float(strike),r/100,iv,0,'c')
elif c_p == 'put':
delta = bs.greeks.delta(float(price),dte/365,float(strike),r/100,iv,0,'p')
## Gamma
gamma = bs.greeks.gamma(float(price),dte/365,float(strike),r/100,iv,0)
## Theta
if c_p == 'call':
theta = bs.greeks.theta(float(price),dte/365,float(strike),r/100,iv,0,'c')
elif c_p == 'put':
theta = bs.greeks.theta(float(price),dte/365,float(strike),r/100,iv,0,'p')
## Vega
vega = bs.greeks.vega(float(price),dte/365,float(strike),r/100,iv,0)
except:
errors.append(['greeks',float(price),dte/365,float(strike),r/100,iv,0])
return 0
return delta,gamma,theta,vega
Any idea what kind of problem is this? Any help will be appreciated. Thanks!