I was trying to speed up the following code, using multiprocessing . So I used this code, after getting some help here :
from multiprocessing import Pool, Manager, cpu_count
from functools import partial
import re
def process_value(data_set_N, i_1):
print(i_1)
for i_2 in list(range(10))+list("+-*/")+["=="]:
for i_3 in list(range(10))+list("+-*/")+["=="]:
for i_4 in list(range(10))+list("+-*/")+["=="]:
for i_5 in list(range(10))+list("+-*/")+["=="]:
for i_6 in list(range(10))+list("+-*/")+["=="]:
for i_7 in list(range(10))+list("+-*/")+["=="]:
for i_8 in list(range(10)):
try:
value = str(i_1)+str(i_2)+str(i_3)+str(i_4)+str(i_5)+str(i_6)+str(i_7)+str(i_8)
if '//' in value:
continue
valuev = re.sub(r'\b0+(?!\b)', '', value)
evaluation = eval(valuev)
if type(evaluation) == type(True) and evaluation:
data_set_N.append(value)
except:
continue
if __name__ == '__main__':
with Manager() as manager:
data_set_N = manager.list()
# The iterable is the i_1 list:
i_1_list = list(range(10))+list("+-")
POOL_SIZE = min(cpu_count(), len(i_1_list))
pool = Pool(POOL_SIZE)
pool.map(partial(process_value, data_set_N), i_1_list)
pool.close()
pool.join()
data_set_N=list(data_set_N)
print(len(data_set_N))
Which is working, but it is supposed to take less time (divide the time by the number of cpus). After waiting for around 24 hours, the code was still executing.
So, I tested the code to compare it (before multiprocessing and after), with less nested loops (6 instead of 8).
from multiprocessing import Pool, Manager, cpu_count
from functools import partial
import re
def process_value(data_set_N, i_1):
for i_2 in list(range(10))+list("+-*/")+["=="]:
for i_3 in list(range(10))+list("+-*/")+["=="]:
for i_4 in list(range(10))+list("+-*/")+["=="]:
for i_5 in list(range(10))+list("+-*/")+["=="]:
for i_6 in list(range(10)):
try:
value = str(i_1)+str(i_2)+str(i_3)+str(i_4)+str(i_5)+str(i_6)
if '//' in value:
continue
valuev = re.sub(r'\b0+(?!\b)', '', value)
evaluation = eval(valuev)
if type(evaluation) == type(True) and evaluation:
data_set_N.append(value)
except:
continue
import time
start_time = time.time()
if __name__ == '__main__':
with Manager() as manager:
data_set_N = manager.list()
# The iterable is the i_1 list:
i_1_list = list(range(10))+list("+-")
POOL_SIZE = min(cpu_count(), len(i_1_list))
pool = Pool(POOL_SIZE)
pool.map(partial(process_value, data_set_N), i_1_list)
pool.close()
pool.join()
data_set_N=list(data_set_N)
print(len(data_set_N))
print("--- %s seconds ---" % (time.time() - start_time))
This one gives 4687 --- 46.11929202079773 seconds ---
While without multiprocessing :
import time
start_time = time.time()
data_set_N =[]
for i_1 in list(range(10))+list("+-"):
for i_2 in list(range(10))+list("+-*/")+["=="]:
for i_3 in list(range(10))+list("+-*/")+["=="]:
for i_4 in list(range(10))+list("+-*/")+["=="]:
for i_5 in list(range(10))+list("+-*/")+["=="]:
for i_6 in list(range(10)):
try:
value = str(i_1)+str(i_2)+str(i_3)+str(i_4)+str(i_5)+str(i_6)
if '//' in value:
continue
valuev = re.sub(r'\b0+(?!\b)', '', value)
evaluation = eval(valuev)
if type(evaluation) == type(True) and evaluation:
data_set_N.append(value)
except:
continue
data_set_N=list(data_set_N)
print(len(data_set_N))
print("--- %s seconds ---" % (time.time() - start_time))
Which gives 4687 --- 48.32949733734131 seconds ---
. Which is almost the same.
I have 12 cpus cpu_count() >>> 12
, still there is almost no difference. I dont know why using multiprocesses doesn't make it run faster.