While using Python3 on Windows 7 to process some large csv files I have run into a issue with the program not running fast enough. The original working version of the code is similar to below, but the process calls are both threads. Upon adding the multiprocessing
library and transferring the tdg.Thread
to the mp.Process
as it shows below I receive this pickling error:
line 70, in <module>
proc1.start()
File "C:\Python34\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Python34\lib\multiprocessing\context.py", line 212, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Python34\lib\multiprocessing\context.py", line 313, in _Popen
return Popen(process_obj)
File "C:\Python34\lib\multiprocessing\popen_spawn_win32.py", line 66, in __init__
reduction.dump(process_obj, to_child)
File "C:\Python34\lib\multiprocessing\reduction.py", line 59, in dump
ForkingPickler(file, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class '_thread.lock'>: attribute lookup lock on _thread failed
Code:
import multiprocessing as mp
import threading as tdg
import queue as q
def my_p1func1(data, Q):
#performs LDAP for data set 1
print("p1f1:",data)
Q.put(data)
def my_p1func2(data, Q):
#performs LDAP for data set2
print("p1f2:",data)
Q.put(data)
def my_proc1(data, Q):
f1_Q = q.Queue()
f2_Q = q.Queue()
f1 = tdg.Thread(target=myP1Func1, args = (data['1'], f1_Q))
f2 = tdg.Thread(target=myP1Func2, args = (data['2'], f2_Q))
f1.start()
f2.start()
f1.join()
f2.join()
f1_out=f1_Q.get()
f2_out=f2_Q.get()
Q.put({'f1':f1_out,'f2':f2_out})
def my_p2func1(data, Q):
#perform gethostbyaddr() for data set 1
print("p2f1:",data)
Q.put(data)
def my_p2func2(data, Q):
#perform gethostbyaddr() for data set 2
print("p2f2:",data)
Q.put(data)
def my_proc2(data, Q):
f1_Q = q.Queue()
f2_Q = q.Queue()
f1 = tdg.Thread(target=myP2Func1, args = (data['1'], f1_Q))
f2 = tdg.Thread(target=myP2Func2, args = (data['2'], f2_Q))
f1.start()
f2.start()
f1.join()
f2.join()
f1_out=f1_Q.get()
f2_out=f2_Q.get()
Q.put({'f1':f1_out,'f2':f2_out})
dataIn = {'1': [1,2,3], '2': ['a','b','c']}
pq1 = q.Queue()
pq2 = q.Queue()
proc1 = mp.Process(target=my_proc1, args=(dataIn, pq1))
proc2 = mp.Process(target=my_proc2, args=(dataIn,pq2))
proc1.start()
proc2.start()
proc1.join()
proc2.join()
p1 = pq1.get()
p2 = pq2.get()
print(p1)
print(p2)
I though the issues was being caused by Locks I had around my print statements, but even after removing them it continues to throw the same pickling error.
I am in over my head with this and would appreciate any help understanding why it is attempting to pickle something not in use and how do I get this running so that it is more efficient?