I'm not sure this is the answer, and also I'm not sure you are still looking for the answer, but...
So you have 100,000 python objects. If these objects are regular data (data sets), not an instance of some class, pass data as json string. Something like this:
#!/usr/bin/env python
import json
import numpy as np
from mpi4py import MPI
comm = MPI.COMM_WORLD
if comm.rank == 0:
tasks = [
json.dumps( { 'a':1,'x':2,'b':3 } ),
json.dumps( { 'a':3,'x':1,'b':2 } ),
json.dumps( { 'a':2,'x':3,'b':1 } )
]
else:
tasks = None
# Scatter paramters arrays
unit = comm.scatter(tasks, root=0)
p = json.loads(unit)
print "-"*18
print("-- I'm rank %d in %d size task" % (comm.rank,comm.size) )
print("-- My paramters are: {}".format(p))
print "-"*18
comm.Barrier()
calc = p['a']*p['x']**2+p['b']
# gather results
result = comm.gather(calc, root=0)
# do something with result
if comm.rank == 0:
print "the result is ", result
else:
result = None
note, that if you have only 8 nodes/cores, you have to create 8 records in the tasks
list and sequentially scatter and gather all 100,000 data sets. If all your data set is in ALLDATA
list, the code could look like this:
def calc(a=0,x=0,b=0):
return a*x**2+b
if comm.rank == 0: collector = []
for xset in zip(*(iter(ALLDATA),) * comm.size):
task = [ json.dumps(s) for s in xset ]
comm.Barrier()
unit = comm.scatter(task if comm.rank == 0 else None, root=0)
p = json.loads(unit)
res = json.dumps( calc(**p) )
totres = comm.gather(res, root=0)
if comm.rank == 0:
collector += [ json.loads(x) for x in totres ]
if comm.rank == 0:
print "the result is ", collector