I am trying to make a lot (~1.7 mil) of images (candlesticks charts with volume) for a CNN. However, the script I currently have keeps increasing its memory usage after each iteration with about 2-5mb per iteration as far is I can tell. This increases until my memory is completely full no matter how many instances I am running of the script. (16gb of which the script eventually uses 11-12 gb's).
The goal is to run multiple instances of the script at the same time. I tried parallel processing, this did not turn out that well. Therefore, I am simply using multiple kernels. I have tried a lot of methods to reduce memory usage, but nothing seems to work.
I am using Jupyter notebooks (Python 3.8.5) (anaconda) in VS code, have a 64 bit Windows system. 16GB of RAM and a Intel i7 8th gen.
First Cell calls the packages, loads the data and sets the parameters.
# import required packages
import matplotlib.dates as mpdates
import matplotlib.pyplot as plt
import mplfinance as mpf
import matplotlib as mpl
from PIL import Image
import pandas as pd
import math as math
import numpy as np
import io as io
import gc as gc
import os as os
#set run instance number
run=1
#timeframe
tf = 20
#set_pixels
img_size=56
#colors
col_up = '#00FF00'
col_down = '#FF0000'
col_vol = "#0000FF"
#set directory
direct = "C:/Users/robin/1 - Scriptie/images/"
#loading the data
data1 = pd.read_csv(r'D:\1 - School\Econometrics\2020 - 2021\Scriptie\Explainable AI\Scripts\Data\test_data.csv',header=[0, 1] , index_col = 0 )
data1.index=pd.to_datetime(data1.index)
#subsetting the data
total_symbols = math.floor(len(data1.columns.unique(level=0))/6)
symbols1 = data1.columns.unique(level=0)[(run-1)*total_symbols:run*total_symbols]
#set the plot parameters
mc = mpf.make_marketcolors(up = col_up ,down = col_down, edge='inherit', volume= col_vol, wick='inherit')
s = mpf.make_mpf_style(marketcolors=mc)
The second cell defines the function used to plot the charts:
# creating candlestick chart with volume
def plot_candle(i,j,data,symbols,s,mc,direct,img_size, tf):
#slicing data into 30 trading day windows
data_temp=data[symbols[j]][i-tf:i]
#creating and saving the candlestick charts
buf = io.BytesIO()
save = dict(fname= buf, rc = (["boxplot.whiskerprops.linewidth",10]),
pad_inches=0,bbox_inches='tight')
mpf.plot(data_temp,savefig=save, type='candle',style=s, volume=True, axisoff=True,figratio=(1,1),closefig=True)
buf.seek(0)
im = Image.open(buf).resize((img_size,img_size))
im.save(direct+"/"+str(symbols[j])+"/"+str(i-tf+1)+".png", "PNG")
buf.close()
plt.close("all")
The third cell loops through the data and calls the functions in the 2nd cell.
#check if images folder excists, if not, create it.
if not os.path.exists(direct):
os.mkdir("C:/Users/robin/1 - Scriptie/images")
for j in range(0,len(symbols1)):
#Check if symbol folder excists, if not, create it
if not os.path.exists(direct+"/"+symbols1[j]):
os.mkdir(direct + "/"+symbols1[j])
for i in range(tf,len(data1)) :
#check if the file has already been created
if not os.path.exists(direct+"/"+str(symbols1[j])+"/" +str(i-tf+1)+".png"):
#call the functions and create the
plot_candle(i , j , data1 , symbols1 ,s ,mc ,direct , img_size, tf)
gc.collect()