I'm writing a graph object to an xml representation. My monolithic code works well, but it's too slow on my large graph. I'm trying to parallelize it, but I'm not getting the SubElement
back from the pool. I'm sure that I'm missing something obvious, but I'm new to python.
import networkx as nx
import lxml.etree as et
from multiprocessing import Pool
G = nx.petersen_graph()
# For any graph, make a node subelement with the id being the node label
def getNodeAttributes(index):
et.SubElement(nodes, "node", attrib={'id': str(G.nodes()[index])})
# Do it with one monolithic process
network = et.Element("network", attrib={"name": "Petersen Graph"})
nodes = et.SubElement(network, "nodes")
for i in range(len(G)):
getNodeAttributes(i)
et.dump(network)
<network name="Petersen Graph">
<nodes>
<node id="0"/>
<node id="1"/>
<node id="2"/>
<node id="3"/>
<node id="4"/>
<node id="5"/>
<node id="6"/>
<node id="7"/>
<node id="8"/>
<node id="9"/>
</nodes>
</network>
# Do it again, but with pool.map in parallel
network = et.Element("network", attrib={"name": "Petersen Graph"})
nodes = et.SubElement(network, "nodes")
pool = Pool(4)
pool.map(getNodeAttributes, range(len(G)))
pool.close()
pool.join()
et.dump(network)
<network name="Petersen Graph">
<nodes/>
</network>