I am currently trying to store some data into .h5 files, I quickly realised that might have to store my data into parts, as it is not possible to process it an have in my ram. I started out using numpy.array
to compress the memory usage, but that resulted in days spend on formatting data.
So i went back to use list
, but made the program monitor the memory usage,
when it was above a specified value, will a part be stored, as a numpy
format - such that a another process can load it and make use of it. Problem with doing this, is that what I thought would release my memory isn't releasing the memory. For some reason is the memory the same even though I reset the variable and del
the variable. Why isn't the memory being released here?
import numpy as np
import os
import resource
import sys
import gc
import math
import h5py
import SecureString
import objgraph
from numpy.lib.stride_tricks import as_strided as ast
total_frames = 15
total_frames_with_deltas = total_frames*3
dim = 40
window_height = 5
def store_file(file_name,data):
with h5py.File(file_name,'w') as f:
f["train_input"] = np.concatenate(data,axis=1)
def load_data_overlap(saved):
#os.chdir(numpy_train)
print "Inside function!..."
if saved == False:
train_files = np.random.randint(255,size=(1,40,690,4))
train_input_data_interweawed_normalized = []
print "Storing train pic to numpy"
part = 0
for i in xrange(100000):
print resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
if resource.getrusage(resource.RUSAGE_SELF).ru_maxrss > 2298842112/10:
print "Max ram storing part: " + str(part) + " At entry: " + str(i)
print "Storing Train input"
file_name = 'train_input_'+'part_'+str(part)+'_'+str(dim)+'_'+str(total_frames_with_deltas)+'_window_height_'+str(window_height)+'.h5'
store_file(file_name,train_input_data_interweawed_normalized)
part = part + 1
del train_input_data_interweawed_normalized
gc.collect()
train_input_data_interweawed_normalized = []
raw_input("something")
for plot in train_files:
overlaps_reshaped = np.random.randint(10,size=(45,200,5,3))
for ind_plot in overlaps_reshaped.reshape(overlaps_reshaped.shape[1],overlaps_reshaped.shape[0],overlaps_reshaped.shape[2],overlaps_reshaped.shape[3]):
ind_plot_reshaped = ind_plot.reshape(ind_plot.shape[0],1,ind_plot.shape[1],ind_plot.shape[2])
train_input_data_interweawed_normalized.append(ind_plot_reshaped)
print len(train_input_data_interweawed_normalized)
return train_input_data_interweawed_normalized_print
#------------------------------------------------------------------------------------------------------------------------------------------------------------
saved = False
train_input = load_data_overlap(saved)
output:
.....
223662080
224772096
225882112
226996224
228106240
229216256
230326272
Max ram storing part: 0 At entry: 135
Storing Train input
something
377118720
Max ram storing part: 1 At entry: 136
Storing Train input
something
377118720
Max ram storing part: 2 At entry: 137
Storing Train input
something