thanks to all for your suggestions.
As suggested by @Aya, I believe that to improve performance (and possible space issues) I should avoid to store the results on the HD because storing them adds half of the time than creating the result, so loading and processing it again would get very close to creating the result again. Additionally, if I do not store any result, I save space which can become a big issue for bigger digraphs (a 12 node complete digraphs has about 115 million cycles but a 29 node ones has about 848E27 cycles... and increasing at factorial rate).
The idea is that I first need to find through all cycles going through the weakest arc to find the total probability of all cycles going it. Then, with this total probability I must go again through all those cycles to subtract them from the original array according to the weighted probability (I needed the total probability to be able to calculate the weighted probalility: weighted_prob= prob_of_this_cycle/total_prob_through_this_edge).
Thus, I believe that this is the best approach to do that (but I am open to more discussions! :) ).
However, I have a doubt regarding speed processing regarding two sub-functions:
1st: find whether a sequence contains a specific (smaller) sequence. I am doing that with the function "contains_sequence" which relies on the generator function "window" (as suggested in Is there a Python builtin for determining if an iterable contained a certain sequence? However I have been told that doing it with a deque would be up to 33% faster. Any other ideas?
2nd: I am currently finding the cycle probability of a cycle by sliding through the cycle nodes (which is represented by a list) to find the probability at the output of each arc to stay within the cycle and then multiply them all to find the cycle probability (the function name is find_cycle_probability). Any performance suggestions on this function would be appreciated since I need to run it for each cycle, i.e. countless times.
Any other tips/suggestion/comments will be most welcome! And thanks again for your help.
Aleix
Below follows the simplified code:
def simple_cycles_generator_w_filters(working_array_digraph, arc):
'''Generator function generating all cycles containing a specific arc.'''
generator=new_cycles.simple_cycles_generator(working_array_digraph)
for cycle in generator:
if contains_sequence(cycle, arc):
yield cycle
return
def find_smallest_arc_with_cycle(working_array,working_array_digraph):
'''Find the smallest arc through which at least one cycle flows.
Returns:
- if such arc exist:
smallest_arc_with_cycle = [a,b] where a is the start of arc and b the end
smallest_arc_with_cycle_value = x where x is the weight of the arc
- if such arc does not exist:
smallest_arc_with_cycle = []
smallest_arc_with_cycle_value = 0 '''
smallest_arc_with_cycle = []
smallest_arc_with_cycle_value = 0
sparse_array = []
for i in range(numpy.shape(working_array)[0]):
for j in range(numpy.shape(working_array)[1]):
if working_array[i][j] !=0:
sparse_array.append([i,j,working_array[i][j]])
sorted_array=sorted(sparse_array, key=lambda x: x[2])
for i in range(len(sorted_array)):
smallest_arc=[sorted_array[i][0],sorted_array[i][1]]
generator=simple_cycles_generator_w_filters(working_array_digraph,smallest_arc)
if any(generator):
smallest_arc_with_cycle=smallest_arc
smallest_arc_with_cycle_value=sorted_array[i][2]
break
return smallest_arc_with_cycle,smallest_arc_with_cycle_value
def window(seq, n=2):
"""Returns a sliding window (of width n) over data from the iterable
s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ... """
it = iter(seq)
result = list(itertools.islice(it, n))
if len(result) == n:
yield result
for elem in it:
result = result[1:] + [elem]
yield result
def contains_sequence(all_values, seq):
return any(seq == current_seq for current_seq in window(all_values, len(seq)))
def find_cycle_probability(cycle, working_array, total_outputs):
'''Finds the cycle probability of a given cycle within a given array'''
output_prob_of_each_arc=[]
for i in range(len(cycle)-1):
weight_of_the_arc=working_array[cycle[i]][cycle[i+1]]
output_probability_of_the_arc=float(weight_of_the_arc)/float(total_outputs[cycle[i]])#NOTE:total_outputs is an array, thus the float
output_prob_of_each_arc.append(output_probability_of_the_arc)
circuit_probabilities_of_the_cycle=numpy.prod(output_prob_of_each_arc)
return circuit_probabilities_of_the_cycle
def clean_negligible_values(working_array):
''' Cleans the array by rounding negligible values to 0 according to a
pre-defined threeshold.'''
zero_threeshold=0.000001
for i in range(numpy.shape(working_array)[0]):
for j in range(numpy.shape(working_array)[1]):
if working_array[i][j] == 0:
continue
elif 0 < working_array[i][j] < zero_threeshold:
working_array[i][j] = 0
elif -zero_threeshold <= working_array[i][j] < 0:
working_array[i][j] = 0
elif working_array[i][j] < -zero_threeshold:
sys.exit('Error')
return working_array
original_array= 1000 * numpy.random.random_sample((5, 5))
total_outputs=numpy.sum(original_array,axis=0) + 100 * numpy.random.random_sample(5)
working_array=original_array.__copy__()
straight_array= working_array.__copy__()
cycle_array=numpy.zeros(numpy.shape(working_array))
iteration_counter=0
working_array_digraph=networkx.DiGraph(working_array)
[smallest_arc_with_cycle, smallest_arc_with_cycle_value]= find_smallest_arc_with_cycle(working_array, working_array_digraph)
while smallest_arc_with_cycle: # using implicit true value of a non-empty list
cycle_flows_to_be_subtracted = numpy.zeros(numpy.shape((working_array)))
# FIRST run of the generator to calculate each cycle probability
# note: the cycle generator ONLY provides all cycles going through
# the specified weakest arc
generator = simple_cycles_generator_w_filters(working_array_digraph, smallest_arc_with_cycle)
nexus_total_probs = 0
for cycle in generator:
cycle_prob = find_cycle_probability(cycle, working_array, total_outputs)
nexus_total_probs += cycle_prob
# SECOND run of the generator
# using the nexus_prob_sum calculated before, I can allocate the weight of the
# weakest arc to each cycle going through it
generator = simple_cycles_generator_w_filters(working_array_digraph,smallest_arc_with_cycle)
for cycle in generator:
cycle_prob = find_cycle_probability(cycle, working_array, total_outputs)
allocated_cycle_weight = cycle_prob / nexus_total_probs * smallest_arc_with_cycle_value
# create the array to be substracted
for i in range(len(cycle)-1):
cycle_flows_to_be_subtracted[cycle[i]][cycle[i+1]] += allocated_cycle_weight
working_array = working_array - cycle_flows_to_be_subtracted
clean_negligible_values(working_array)
cycle_array = cycle_array + cycle_flows_to_be_subtracted
straight_array = straight_array - cycle_flows_to_be_subtracted
clean_negligible_values(straight_array)
# find the next weakest arc with cycles.
working_array_digraph=networkx.DiGraph(working_array)
[smallest_arc_with_cycle, smallest_arc_with_cycle_value] = find_smallest_arc_with_cycle(working_array,working_array_digraph)