batch_size = 500
filenames = glob(...) # fill with your own details
nfiles = len(filenames)
nbatches, remainder = divmod(nfiles, batch_size)
for i in xrange(nbatches): # or range() for Python 3
batch = filenames[batch_size * i:batch_size * (i + 1)]
do_something_with(batch)
if remainder:
do_something_with(filenames[batch_size * nbatches:])
A version that uses a generator to take every N
elements from a possibly non-ending iterable:
def every(thing, n):
"""every(ABCDEFG, 2) --> AB CD EF G"""
toexit = False
it = iter(thing)
while not toexit:
batch = []
for i in xrange(n):
try:
batch.append(it.next())
except StopIteration:
toexit = True
if not batch:
break
yield batch
filenames_i = glob.iglob("...")
for batch in every(filenames_i, 500):
do_something_with(batch)
This would make the iteration over the batches themselves more concise (the for batch in every()
in this code snippet).