The following code (python) measures the speedup when increasing number of processing. The task in the multiprocessing is just multiplying a random matrix, the size of which is also varied and corresponding elapsed time is measured.
Note that, each process does not share any object and they are completely independent. So, I expected that performance curve when changing number of process will be almost same for all matrix size. However, when plotting the results (see below), I found that the expectation is false. Specifically, when matrix size becomes large (80, 160), the performance hardly be better though number of process increased. Note: The figures legend indicates the matrix sizes.
Could you explain, why performance does not become better when matrix size is large?
For your information, here is the spec of my CPU: https://www.amd.com/en/products/cpu/amd-ryzen-9-3900x
Product Family: AMD Ryzen™ Processors
Product Line: AMD Ryzen™ 9 Desktop Processors
# of CPU Cores: 12
# of Threads: 24
Max. Boost Clock: Up to 4.6GHz
Base Clock: 3.8GHz
L1 Cache: 768KB
L2 Cache: 6MB
L3 Cache: 64MB
main script
import numpy as np
import pickle
from dataclasses import dataclass
import time
import multiprocessing
import os
import subprocess
import numpy as np
def split_number(n_total, n_split):
return [n_total // n_split + (1 if x < n_total % n_split else 0) for x in range(n_split)]
def task(args):
n_iter, idx, matrix_size = args
#cores = "{},{}".format(2 * idx, 2 * idx+1)
#os.system("taskset -p -c {} {}".format(cores, os.getpid()))
for _ in range(n_iter):
A = np.random.randn(matrix_size, matrix_size)
for _ in range(100):
A = A.dot(A)
def measure_time(n_process: int, matrix_size: int) -> float:
n_total = 100
assigne_list = split_number(n_total, n_process)
pool = multiprocessing.Pool(n_process)
ts = time.time()
pool.map(task, zip(assigne_list, range(n_process), [matrix_size] * n_process))
elapsed = time.time() - ts
return elapsed
if __name__ == "__main__":
n_experiment_sample = 5
n_logical = os.cpu_count()
n_physical = int(0.5 * n_logical)
result = {}
for mat_size in [5, 10, 20, 40, 80, 160]:
subresult = {}
result[mat_size] = subresult
for n_process in range(1, n_physical + 1):
elapsed = np.mean([measure_time(n_process, mat_size) for _ in range(n_experiment_sample)])
subresult[n_process] = elapsed
print("{}, {}, {}".format(mat_size, n_process, elapsed))
with open("result.pkl", "wb") as f:
pickle.dump(result, f)
plot script
import numpy as np
import matplotlib.pyplot as plt
import pickle
with open("result.pkl", "rb") as f:
result = pickle.load(f)
fig, ax = plt.subplots()
for matrix_size in result.keys():
subresult = result[matrix_size]
n_process_list = list(subresult.keys())
elapsed_time_list = np.array(list(subresult.values()))
speedups = elapsed_time_list[0] / elapsed_time_list
ax.plot(n_process_list, speedups, label=matrix_size)
ax.set_xlabel("number of process")
ax.set_ylabel("speed up compared to single process")
ax.legend(loc="upper left", borderaxespad=0, fontsize=10, framealpha=1.0)
plt.show()