I am trying to understand how can I use threads inside the mutiple process, So that I could take full advatage of all my cores.
import multiprocessing
from time import sleep
import time
import queue
class Work:
"""
Dummy class to represent some intensive work
"""
def __init__(self,a,b):
self.a=a
self.b=b
def do(self):
sleep_time=5
sleep(sleep_time)
print("slept for {}".format(sleep_time))
print("process name :{}".format(multiprocessing.current_process().name))
print("Thread name :{}".format(threading.current_thread().name))
return self.a+self.b
class ThreadWrapper(threading.Thread):
def __init__(self,obj,method_name):
super().__init__()
self.obj=obj
self.method_name=method_name
self.result=None
def run(self):
try:
self.result=getattr(self.obj,self.method_name)()
except Exception as e:
print("Exception occured {}".format(e))
def get_result(self):
return self.result
def main(work_items,result_queue,kls,method_name):
threads=[]
for item in work_items:
obj=kls(*item)
threads.append(ThreadWrapper(obj,method_name))
for thread in threads:
thread.start()
for thread in threads:
thread.join()
for thread in threads:
if not thread.is_alive():
result=thread.get_result()
result_queue.put(result)
def threads_only():
list_of_items=[(1,2),(3,4),(5,6),(1,2),(3,4),(5,6),(1,2),(3,4),(5,6)]
result_queue=multiprocessing.Queue()
main(list_of_items,result_queue,Work,'do')
while not result_queue.empty():
print(result_queue.get())
Case-1 ----> using both Multiprocessing and threading
if __name__ == '__main__':
t1=time.time()
result_queue=multiprocessing.Queue()
list_of_items=[[(1,2),(3,4),(5,6)],[(1,2),(3,4),(5,6)],[(1,2),(3,4),(5,6)]]
processes = []
for items in list_of_items :
p = multiprocessing.Process(target=main,args=(items,result_queue,Work,'do')) # create a new Process
processes.append(p)
for process in processes:
process.start()
process.join()
while not result_queue.empty():
print(result_queue.get())
t2=time.time()
print("Total time taken is {}".format(t2-t1)) # Total time taken is 16.146932125091553
Case-2 Using only threading
if __name__ == '__main__':
t1=time.time()
threads_only()
t2=time.time()
print("Total time taken is {}".format(t2-t1)) # Total time taken is 5.06544041633606
Here is the summary:
Total_work_input=9
1.Without any concurrency Total_time_taken= 9*5s(sleep time)+ ~1s (actual work) =~ 46s
2.Case-1 (multiprocessing + threads) = 16.146932125091553s
3.Case-2 (Only Threads) = 5.06544041633606
I have 4 cores in my system,
os.cpu_count() 4
Shouldn't each process be given to each core ???
So that I should be getting similar result as Case-2 (~5 sec) where I have used only threading
What I am missing here?
Please help me understand