As stated in prior answers - it depends on the answer to "cpu or i/o bound?",
but also to the answer to "threaded or multi-processing?":
Examples run on Raspberry Pi 3B 1.2GHz 4-core with Python3.7.3
--( With other processes running including htop )
- For this test - multiprocessing and threading had similar results for i/o bound,
but multi-processing was more efficient than threading for cpu-bound.
Using threads:
Typical Result:
. Starting 4000 cycles of io-bound threading
. Sequential run time: 39.15 seconds
. 4 threads Parallel run time: 18.19 seconds
. 2 threads Parallel - twice run time: 20.61 seconds
Typical Result:
. Starting 1000000 cycles of cpu-only threading
. Sequential run time: 9.39 seconds
. 4 threads Parallel run time: 10.19 seconds
. 2 threads Parallel twice - run time: 9.58 seconds
Using multiprocessing:
Typical Result:
. Starting 4000 cycles of io-bound processing
. Sequential - run time: 39.74 seconds
. 4 procs Parallel - run time: 17.68 seconds
. 2 procs Parallel twice - run time: 20.68 seconds
Typical Result:
. Starting 1000000 cycles of cpu-only processing
. Sequential run time: 9.24 seconds
. 4 procs Parallel - run time: 2.59 seconds
. 2 procs Parallel twice - run time: 4.76 seconds
compare_io_multiproc.py:
#!/usr/bin/env python3
# Compare single proc vs multiple procs execution for io bound operation
"""
Typical Result:
Starting 4000 cycles of io-bound processing
Sequential - run time: 39.74 seconds
4 procs Parallel - run time: 17.68 seconds
2 procs Parallel twice - run time: 20.68 seconds
"""
import time
import multiprocessing as mp
# one thousand
cycles = 1 * 1000
def t():
with open('/dev/urandom', 'rb') as f:
for x in range(cycles):
f.read(4 * 65535)
if __name__ == '__main__':
print(" Starting {} cycles of io-bound processing".format(cycles*4))
start_time = time.time()
t()
t()
t()
t()
print(" Sequential - run time: %.2f seconds" % (time.time() - start_time))
# four procs
start_time = time.time()
p1 = mp.Process(target=t)
p2 = mp.Process(target=t)
p3 = mp.Process(target=t)
p4 = mp.Process(target=t)
p1.start()
p2.start()
p3.start()
p4.start()
p1.join()
p2.join()
p3.join()
p4.join()
print(" 4 procs Parallel - run time: %.2f seconds" % (time.time() - start_time))
# two procs
start_time = time.time()
p1 = mp.Process(target=t)
p2 = mp.Process(target=t)
p1.start()
p2.start()
p1.join()
p2.join()
p3 = mp.Process(target=t)
p4 = mp.Process(target=t)
p3.start()
p4.start()
p3.join()
p4.join()
print(" 2 procs Parallel twice - run time: %.2f seconds" % (time.time() - start_time))
compare_cpu_multiproc.py
#!/usr/bin/env python3
# Compare single proc vs multiple procs execution for cpu bound operation
"""
Typical Result:
Starting 1000000 cycles of cpu-only processing
Sequential run time: 9.24 seconds
4 procs Parallel - run time: 2.59 seconds
2 procs Parallel twice - run time: 4.76 seconds
"""
import time
import multiprocessing as mp
# one million
cycles = 1000 * 1000
def t():
for x in range(cycles):
fdivision = cycles / 2.0
fcomparison = (x > fdivision)
faddition = fdivision + 1.0
fsubtract = fdivision - 2.0
fmultiply = fdivision * 2.0
if __name__ == '__main__':
print(" Starting {} cycles of cpu-only processing".format(cycles))
start_time = time.time()
t()
t()
t()
t()
print(" Sequential run time: %.2f seconds" % (time.time() - start_time))
# four procs
start_time = time.time()
p1 = mp.Process(target=t)
p2 = mp.Process(target=t)
p3 = mp.Process(target=t)
p4 = mp.Process(target=t)
p1.start()
p2.start()
p3.start()
p4.start()
p1.join()
p2.join()
p3.join()
p4.join()
print(" 4 procs Parallel - run time: %.2f seconds" % (time.time() - start_time))
# two procs
start_time = time.time()
p1 = mp.Process(target=t)
p2 = mp.Process(target=t)
p1.start()
p2.start()
p1.join()
p2.join()
p3 = mp.Process(target=t)
p4 = mp.Process(target=t)
p3.start()
p4.start()
p3.join()
p4.join()
print(" 2 procs Parallel twice - run time: %.2f seconds" % (time.time() - start_time))