I'm trying to figure out how I can load thousands of numpy files using an iterator. To do this without overflowing my memory, I need to be able to open and close the files.
I have a list called train_data
that holds a list of numpy filenames. I can load one numpy array using
np.load(train_data[0])
but if I try to load it using an iterator and the context manager (which according to this answer would help with memory, as it should close the file by itself after it is done with it), it throws an error.
class AbstractIterator(object):
def __iter__(self):
for abstract_filename in train_data:
with np.load(abstract_filename) as a:
print(abstract_filename)
iterator = AbstractIterator()
for i in iterator:
print(i)
Error message:
Traceback (most recent call last):
File "/Users/briennakh/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-53-12ff2d39b102>", line 8, in <module>
for i in AbstractIterator():
File "<ipython-input-53-12ff2d39b102>", line 5, in __iter__
with np.load(abstract_filename) as a:
AttributeError: __enter__
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/briennakh/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
stb = value._render_traceback_()
AttributeError: 'AttributeError' object has no attribute '_render_traceback_'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/briennakh/anaconda3/lib/python3.6/site-packages/IPython/core/ultratb.py", line 1151, in get_records
return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
File "/Users/briennakh/anaconda3/lib/python3.6/site-packages/IPython/core/ultratb.py", line 319, in wrapped
return f(*args, **kwargs)
File "/Users/briennakh/anaconda3/lib/python3.6/site-packages/IPython/core/ultratb.py", line 353, in _fixed_getinnerframes
records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))
File "/Users/briennakh/anaconda3/lib/python3.6/inspect.py", line 1483, in getinnerframes
frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)
File "/Users/briennakh/anaconda3/lib/python3.6/inspect.py", line 1441, in getframeinfo
filename = getsourcefile(frame) or getfile(frame)
File "/Users/briennakh/anaconda3/lib/python3.6/inspect.py", line 696, in getsourcefile
if getattr(getmodule(object, filename), '__loader__', None) is not None:
File "/Users/briennakh/anaconda3/lib/python3.6/inspect.py", line 725, in getmodule
file = getabsfile(object, _filename)
File "/Users/briennakh/anaconda3/lib/python3.6/inspect.py", line 709, in getabsfile
return os.path.normcase(os.path.abspath(_filename))
File "/Users/briennakh/anaconda3/lib/python3.6/posixpath.py", line 376, in abspath
cwd = os.getcwd()
FileNotFoundError: [Errno 2] No such file or directory
How can I load and properly close the file after I'm done working with it, in an iterator?
EDIT: I need an iterator. Not something that is a workaround. I'm using this with gensim, which takes an iterator.
I have tried to incorporate the answer into my code.
class DocumentIterator(object):
def load_documents(self):
for filename in self.filenames:
loaded_file = np.load(filename)
tag = os.path.splitext(os.path.basename(filename))[0]
abstract = TaggedDocument(words=loaded_file, tags=[tag])
try:
yield abstract
except Exception as e:
print('Error!' + str(e))
finally:
del loaded_file
print('Closed ' + tag)
def __init__(self, filenames):
self.filenames = filenames
self.generator = self.load_documents()
def __iter__(self):
self.generator = self.load_documents() # Reset the iterator
return self
def __next__(self):
abstract = next(self.generator)
return abstract
This works.