Here's a minimal example of what I'm trying to parallelize
import numpy as np
def generate_function(a):
def func(x):
'''a complex function that uses several modules'''
return x + np.sqrt(a)
return func
if __name__ == '__main__':
f = generate_function(0.5)
x = np.arange(0, 100)
y = np.array(list(map(f, x))) # want to parallelize this step
with multiprocessing
, the nested func
causes problems, since pickle
can't access nested functions
import multiprocessing as mp
...
pool = mp.Pool(2)
y = np.array(pool.map(f, x))
AttributeError: Can't pickle local object 'generate_function.<locals>.func'
even with pathos
, the modules are not imported
import pathos
...
pool = pathos.multiprocessing.ProcessPool(2)
y = np.array(pool.map(f, x))
NameError: name 'np' is not defined
Note that none of the other solutions on Python multiprocessing PicklingError: Can't pickle <type 'function'> work either
What's the best way to parallelize this?
So it is possible to get pathos
to work by reimporting inside of generate_function
def generate_function(a):
import numpy as np
def func(x):
'''a complex function that uses several modules'''
return x + np.sqrt(a)
return func
but I may have several imports with multiple generate_function
s and multiple layers of nesting, and it will quickly get quite cumbersome keeping track of all that, so I would like to avoid this mess
def generate_function1(a):
import module1, module2, module3
from module4 import a, b
from module5 import c as d
from module6 import e as f
def func(x):
...
return func
def generate_function2(a):
import module1, module2, module3
from module4 import a, b
from module5 import c as d
from module6 import e as f
def func(x):
...
return func
def generate_generator_function(a):
import module1, module2, module3
from module4 import a, b
from module5 import c as d
from module6 import e as f
def generate_function(a):
import module1, module2, module3
from module4 import a, b
from module5 import c as d
from module6 import e as f
def func(x):
...
return func
return generate_function