I am using multiprocessing module in python to make the function to run in parallel.
the functions name is:
Parallel_Solution_Combination_Method(subset, i):
the subset parameter is a list element which is made up of a tuple of chromosomes.
chromosome is a class defined by me within the same script. I am running on Lubuntu Linux based OS. the code that I'm using to try to run the function in parallel is:
pool = mp.Pool(processes=2)
results = [pool.apply_async(Parallel_Solution_Combination_Method,
args=(subsets[i],i,)
)
for i in range(len(subsets))
]
however the problem that I'm encoutring is that, whenever I specify the number of processes more than 1, the results are not as expected, lets say if I'm passing a list of subsets of size 10 and im using:
processes=2
then the first two outputs are resulting exactly the same values, outputs 3 and 4 the same and so on, whereas if I specify the number of processes:
processes = 1
which essentially is a sequential run, then the outcome is correct as expected (same as a normal for loop without multiprocessing.
I don't know why my results are getting mixed up even though I'm explicitly sending a different tuple from the set which is specified by the index i of the fool loop.
args=(subsets[i],i,)
I am running on a hardware with two cores so I was hoping that I can run two instances of the function in parallel, but the outcome is that it's producing duplicate results. I cant figure out my wrong doing. Please help!! Thank you.
def Parallel_Solution_Combination_Method(subset, counter):
print 'entered parallel sol comb'
child_chromosome = chromosome()
combination_model_offset = 300
attempts = 0
while True:
template1 = subset[0].record_template
template2 = subset[1].record_template
template_child = template1
template_gap1 = find_allIndices(template1, '-')
template_gap2 = find_allIndices(template2, '-')
if(len(template_gap1) !=0 and len(template_gap2) != 0):
template_gap_difference = find_different_indicies(template_gap1, template_gap2)
if(len(template_gap_difference) != 0):
template_slice_point = random.choice(template_gap_difference)
if(template_gap2[template_slice_point -1] < template_gap1[template_slice_point]):
#swap template1 template2 values as well as their respective gap indices
#so that in crossover the gaps would not collide with each other.
temp_template = template1
temp_gap = template_gap1
template1 = template2
template2 = temp_template
template_gap1 = template_gap2
template_gap2 = temp_gap
#the crossing over takes the first part of the child sequence to be up until
#the crossing point without including it. this way it ensures that the resulting
#child sequence is different from both of the parents by at least one point.
child_template_gap = template_gap1[:template_slice_point]+template_gap2[template_slice_point:]
child_gap_part1 = child_template_gap[:template_slice_point]
child_gap_part2 = child_template_gap[template_slice_point:]
if template_slice_point == 0:
template_child = template2
else:
template_child = template1[:template_gap1[template_slice_point]]
template_residues_part1 = str(template_child).translate(None, '-')
template_residues_part2 = str(template2).translate(None, '-')
template_residues_part2 = template_residues_part2[len(template_residues_part1):]
for i in range(template_gap1[template_slice_point-1], len(template1)):
if i in child_gap_part2:
template_child = template_child + '-'
else:
template_child = template_child + template_residues_part2[0:1]
template_residues_part2 = template_residues_part2[1:]
target1 = subset[0].record_target
target2 = subset[1].record_target
target_child = target1
target_gap1 = find_allIndices(target1, '-')
target_gap2 = find_allIndices(target2, '-')
if(len(target_gap1) !=0 and len(target_gap2) != 0):
target_gap_difference = find_different_indicies(target_gap1, target_gap2)
if(len(target_gap_difference) !=0):
target_slice_point = random.choice(target_gap_difference)
if(target_gap2[target_slice_point -1] < target_gap1[target_slice_point]):
#swap template1 template2 values as well as their respective gap indices
#so that in crossover the gaps would not collide with each other.
temp_target = target1
temp_gap = target_gap1
target1 = target2
target2 = temp_target
target_gap1 = target_gap2
target_gap2 = temp_gap
#the crossing over takes the first part of the child sequence to be up until
#the crossing point without including it. this way it ensures that the resulting
#child sequence is different from both of the parents by at least one point.
child_target_gap = target_gap1[:target_slice_point]+target_gap2[target_slice_point:]
child_gap_part1 = child_target_gap[:target_slice_point]
child_gap_part2 = child_target_gap[target_slice_point:]
if target_slice_point == 0:
target_child = target2
else:
target_child = target1[:target_gap1[target_slice_point]]
target_residues_part1 = str(target_child).translate(None, '-')
target_residues_part2 = str(target2).translate(None, '-')
target_residues_part2 = target_residues_part2[len(target_residues_part1):]
for i in range(target_gap1[target_slice_point-1], len(target1)):
if i in child_gap_part2:
target_child = target_child + '-'
else:
target_child = target_child + target_residues_part2[0:1]
target_residues_part2 = target_residues_part2[1:]
if not [False for y in Reference_Set if y.record_template == template_child and y.record_target == target_child] or attempts <= 100:
break
attempts +=1
child_chromosome.record_template = template_child
#print template_child
child_chromosome.record_target = target_child
#print target_child
generate_PIR(template_header, template_description, child_chromosome.record_template, target_header,target_description, child_chromosome.record_target)
output_values = start_model(template_id, target_id,'PIR_input.ali', combination_model_offset + counter)
child_chromosome.molpdf_score = output_values['molpdf']
#print output_values['molpdf']
mdl = complete_pdb(env, '1BBH.B99990'+ str(combination_model_offset + counter)+'.pdb')
child_chromosome.normalized_dope_score = mdl.assess_normalized_dope()
#print mdl.assess_normalized_dope()
return child_chromosome
this is the code for the Parallel_Soultion_Combination_Method, also if it becomes handy, i'm including the chromosome class that I defined:
class chromosome():
"""basic solution represenation that holds alignments and it's evaluations"""
def __init__(self):
self.record_template = ''
self.record_target = ''
self.molpdf_score = 0.0
self.ga341_score = 0.0
self.dope_score = 0.0
self.normalized_dope_score = 0.0
self.flag_value = 0
self.distance_value = 0
def add_molpdf(self, molpdf):
self.molpdf_score = molpdf
def add_ga341(self, ga341):
self.ga341_score = ga341
def add_dope(self, dope):
self.dope_score = dope
def add_normalized_dope(self, normalized_dope):
self.normalized_dope_score = normalized_dope
def add_records(self, records):
self.seq_records = records
for rec in self.seq_records:
if rec.id == template_id:
self.record_template = rec.seq
elif rec.id == target_id:
self.record_target = rec.seq
def set_flag(self, flag):
self.flag_value = flag
def add_distance(self, distance):
self.distance_value = distance
please note that all of this is within the same python script.