I am writing a program where I have object oriented code where I am trying to do multiprocessing. I was getting pickle errors because by default python can serialize functions but not class methods. So I used suggestion on Can't pickle <type 'instancemethod'> when using python's multiprocessing Pool.map() but the problem is that if I have some lambda expressions inside my methods it's not working. My sample code is as follows:
import numpy as np
from copy_reg import pickle
from types import MethodType
from multiprocessing.pool import ApplyResult
from _functools import partial
from _collections import defaultdict
class test(object):
def __init__(self,words):
self.words=words
# self.testLambda = defaultdict(lambda : 1.)
def parallel_function(self,f):
def easy_parallize(f,sequence):
from multiprocessing import Pool
pool = Pool(processes=50) # depends on available cores
result = pool.map(f, sequence) # for i in sequence: result[i] = f(i)
cleaned = [x for x in result if not x is None] # getting results
cleaned = np.asarray(cleaned)
pool.close() # not optimal! but easy
pool.join()
return cleaned
from functools import partial
return partial(easy_parallize, f)
def dummy(self):
self.t=defaultdict(lambda:1.)
def test(self,a,b,x):
print x
print a
return x*x
def testit(self):
sequence=[1,2,3,4,5]
f1=partial(self.test,'a','b')
f_p=self.parallel_function(f1)
results=f_p(sequence)
def _pickle_method(method):
func_name = method.im_func.__name__
obj = method.im_self
cls = method.im_class
return _unpickle_method, (func_name, obj, cls)
def _unpickle_method(func_name, obj, cls):
for cls in cls.mro():
try:
func = cls.__dict__[func_name]
except KeyError:
pass
else:
break
return func.__get__(obj, cls)
if __name__ == "__main__":
pickle(MethodType, _pickle_method, _unpickle_method)
t=test('fdfs')
t.dummy()
t.testit()
But I get following error due to lambda expression:
Traceback (most recent call last):
File "/home/ngoyal/work/nlp_source/language-change/test.py", line 76, in <module>
t.testit()
File "/home/ngoyal/work/nlp_source/language-change/test.py", line 51, in testit
results=f_p(sequence)
File "/home/ngoyal/work/nlp_source/language-change/test.py", line 28, in easy_parallize
result = pool.map(f, sequence) # for i in sequence: result[i] = f(i)
File "/usr/lib/python2.7/multiprocessing/pool.py", line 251, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 558, in get
raise self._value
cPickle.PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed
Is there any straight forward way to tackle it without moving to some other package which uses dill or something? Can this be done with normal python libraries? (I am using python 2.7)