0

I am trying to debug a data preprocessor for training a CNN. (it tries to load data along with training). Giving out the too many open files error with a dataset of > 400 images (less than roughly 400 works). Here's the code. It's sort of like a mem leak going on, maybe too many files are being queued? Or I am not deleting something(PIL? But it closes files on its own).

def buffered_gen_mp(pil_img_gen, buffer_size=2):
    """
    Generator that runs a slow source generator in a separate process.
    buffer_size: the maximal number of items to pre-generate (length of the buffer)
    """

    buffer = mp.Queue(maxsize=buffer_size-1)

    def _buffered_generation_process(pil_img_gen_, buffer):
        for (img_fname,img),(limg_fname,limg) in pil_img_gen_:

            sio = StringIO.StringIO()
            img.save(sio, 'PNG')
            img_enc = sio.getvalue()
            sio.close()
            sio = StringIO.StringIO()
            limg.save(sio, 'PNG')
            limg_enc = sio.getvalue()
            sio.close()

            buffer.put(((img_fname,img_enc),(limg_fname,limg_enc)), block=True)

        buffer.put(None) # sentinel: signal the end of the iterator
        buffer.close() 

    process = mp.Process(target=_buffered_generation_process, args=(pil_img_gen, buffer))
    process.start()

    for data in iter(buffer.get, None):
        (img_fname,img_enc),(limg_fname,limg_enc) = data
        img = Image.open(StringIO.StringIO(img_enc))
        limg = Image.open(StringIO.StringIO(limg_enc))
        yield ((img_fname,img),(limg_fname,limg))


def ImageFnameGen(data_dir, img=True, label=True, depth=False, disp=False):
    for inst_dir in sorted(data_dir.dirs()):
        out = []
        img_fname = inst_dir/'image.jpg'
        limg_fname = inst_dir/'labels.png'
        if img:
            out.append(img_fname)
        if label:
            out.append(limg_fname)
        yield out

def PilImageGen(img_fname_g):
    for fnames in img_fname_g:
        out = []
        for fname in fnames:
            out.append((fname,Image.open(str(fname))))
        yield out


def ScaledImageGen(cfg, data_dir=None, randomize=True, loop=True):
    img_fname_gen = ImageFnameGen(data_dir)
    pil_img_gen = PilImageGen(img_fname_gen)
    out = []
    for (img_fname,img),(limg_fname,limg) in pil_img_gen:
        # resize img  and limg
        out.append(((img_fname,img),(limg_fname,limg)))
    while True:
        if randomize:
            random.shuffle(out)

        for item in out:
            yield item

        if not loop:
            break


def GeomJitImageGen(cfg, scaled_img_gen):
    #stuff
    while True:
        for (img_fname, img), (limg_fname, limg) in scaled_img_gen:
            # do some stuff
            img = np.asarray(img).astype(np.double)
            limg = np.asarray(limg).astype(np.double)

            wimg = warper_x(img).clip(0,255).astype(np.uint8)
            wlimg = warper_y(limg).clip(0,255).astype(np.uint8)
            yield (img_fname, Image.fromarray(wimg)), (limg_fname, Image.fromarray(wlimg))


def PhotoJitImageGen(img_gen):
    # define weights, algorithm
    while True:
        for (img_fname, img), (limg_fname, limg) in img_gen:
            alg = np.random.choice(algorithms,p=weights)
            jimg = alg(img)
            yield (img_fname, jimg), (limg_fname, limg)


class Loader(object):
    def __init__(self, args, expt):
        # define self.cfg
        input_gen = buffered_gen_mp(PhotoJitImageGen(GeomJitImageGen(self.cfg, ScaledImageGen(self.cfg))), buffer_size=32*8)
        self.input_gen = input_gen
        # stuff

    def __call__(self, x_shared, y_shared):
        assert(len(x_shared)==len(y_shared))
        n = len(x_shared)
        for ix in xrange(n):
            (img_fname, pil_img), (limg_fname, pil_limg) = self.input_gen.next()
            img = np.asarray(pil_img)
            limg = np.asarray(pil_limg)
            # stuff
madratman
  • 127
  • 2
  • 8
  • You could increase the maximum number of open files. However, this sounds indeed like a resource leak, so that will only be a workaround. Have you tried running this with `strace` to find out if the error is accurate? – Ulrich Eckhardt Apr 02 '16 at 11:33
  • I didn't know about strace. No idea what it means 8224 munmap(0x7f190a3c5000, 33554432) = 0 8224 munmap(0x7f1907bc4000, 33554432) = 0 8224 munmap(0x7f1905bc4000, 33554432) = 0 8224 munmap(0x7f19033c3000, 33554432) = 0 8224 munmap(0x7f19003c1000, 33554432) = 0 8224 munmap(0x7f18fe3c1000, 33554432) = 0 8224 munmap(0x7f18fbbc0000, 33554432) = 0 8224 munmap(0x7f18f47fc000, 33554432) = 0 8224 munmap(0x7f18f27fc000, 33554432) = 0 8224 munmap(0x7f18ef7fa000, 33554432) = 0 8224 munmap(0x7f18ed7fa000, 33554432) – madratman Apr 02 '16 at 11:48
  • ok . something here http://stackoverflow.com/questions/6334515/how-do-i-interpret-strace-output. Not sure how to verify strace exactly. But it's definitely a leak, do you see anything fishy in code? – madratman Apr 02 '16 at 11:50
  • I didn't look too much at the code, just for classic calls to `open()` without `with`. Anyhow, try with `strace -e 'open,close'` to find calls to the `open` and `close` syscalls. The result should give you the filedescriptors (indices) and according paths, telling you which files are opened and perhaps a hint how you can avoid opening too many of them. – Ulrich Eckhardt Apr 02 '16 at 13:03
  • Thanks. But can't do it. Perks of being non-sudo. Anyways, I ll try something and see if I can fix it. Or load data in a simpler manner. – madratman Apr 03 '16 at 01:57

0 Answers0