I am trying to excute the following code, to save the list data_set_N at the end.
import re
data_set_N=[]
for i_1 in list(range(10))+list("+-"):
for i_2 in list(range(10))+list("+-*/")+["=="]:
for i_3 in list(range(10))+list("+-*/")+["=="]:
for i_4 in list(range(10))+list("+-*/")+["=="]:
for i_5 in list(range(10))+list("+-*/")+["=="]:
for i_6 in list(range(10))+list("+-*/")+["=="]:
for i_7 in list(range(10))+list("+-*/")+["=="]:
for i_8 in list(range(10)):
try :
value= str(i_1)+str(i_2)+str(i_3)+str(i_4)+str(i_5)+str(i_6)+str(i_7)+str(i_8)
valuev=re.sub(r'\b0+(?!\b)', '', value)
evaluation = eval(valuev)
if type(evaluation) == type(True) and evaluation and "//" not in value:
data_set_N.append(value)
except:
continue
print(len(data_set_N))
The problem is that it will take more than 50 hours, for the the first i_1 it took 4.5 hours.
To get data_set_N
faster I wanted to use multiprocessing. The idea was to use something like this :
from multiprocessing import Process, Manager
import itertools
import re
def add_value(data_set_N,paramlist):
#I am not sure if this function is well defined
try
i_1,i_2,i_3,i_4 = paramlist[0],paramlist[1],paramlist[2],paramlist[3]
i_5,i_6,i_7,i_8 = paramlist[4],paramlist[5],paramlist[6],paramlist[7]
value = str(i_1)+str(i_2)+str(i_3)+str(i_4)+str(i_5)+str(i_6)+str(i_7)+str(i_8)
valuev =re.sub(r'\b0+(?!\b)', '', value)
evaluation = eval(valuev)
if type(evaluation) == type(True) and evaluation and "//" not in value:
data_set_N.append(value)
except:
return
data_set_N = []
#Generate values for each parameter
I_1 = list(range(10))+list("+-")
I_2 = list(range(10))+list("+-*/")+["=="]
I_3 = list(range(10))+list("+-*/")+["=="]
I_4 = list(range(10))+list("+-*/")+["=="]
I_5 = list(range(10))+list("+-*/")+["=="]
I_6 = list(range(10))+list("+-*/")+["=="]
I_7 = list(range(10))+list("+-*/")+["=="]
I_8 = list(range(10))
paramlist = list(itertools.product(I_1,I_2,I_3,I_4,I_5,I_6,I_7,I_8))
if __name__ == "__main__":
with Manager() as manager:
data_set_N = manager.list() # <-- can be shared between processes.
processes = []
for i in range(10): #os.cpu_count() - 2 =10 , this range can be changed
p = Process(target=add_value, args=(data_set_N,paramlist)) # Passing the list
p.start()
processes.append(p)
for p in processes:
pool.close()
pool.join()
data_set_N = list(data_set_N) #the final list
The problem here is with paramlist
that causes a MemoryError
(because its size is 12x15^6x10).
Is there a way to use multiprocessing to execute the code faster (around 10 hours), while avoiding memory problems?