I have written a very crude hill-climb/greedy algorithm for solving the travelling salesman problem. The aim was to use this as a benchmark and continually improve running time and optimise the solution at the same time.
EDIT: On the suggestion of Stefan Pochmann, this appears to be related to hardware and not the program. On a i7-4790 processor, doing sum(xrange(10**8))
immediately before starting the hill-climb loop will cause run time to more than half for the first hill-climb and improve even further on each loop.
If I run the hill-climb algorithm on the same data 10 times in for
a loop (each hill-climb doing 10,000 iterations), I note that there is almost always a general decline in runtime for solution, with the final solution taking ~50% of the time required for the first solution. The only thing calculated on each solution is the hill-climb itself; supporting data such as the distance/time matrix and job list is stored in memory prior to all solutions. Thus, the first three functions are simply for MCVE and can be ignored.
The printed output is the runtime followed by a list of [(iteration_number, greedy_route_cost)]
i.e. the iteration number at which a new best_route
was selected when searching for a solution. The runtime seems independent of how many interim routes are stored from each run of the hill-climb. Each solution should be independent. Is anyone able to see a reason for this acceleration in calc_route_cost
or hill_climb
? What am I missing here and how would I go about profiling the cause of this acceleration? This is in Python 2.7.9.
import math
import random
import datetime
# To generate a random customer name for each fake order
LETTERS = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q',
'r','s','t','u','v','w','x','y','z']
def crow_flies(lat1, lon1, lat2, lon2):
''' For MCVE. Straight-line distance between two locations. Should not affect
run time.
'''
dx1,dy1 = (lat1/180)*3.141593,(lon1/180)*3.141593
dx2,dy2 = (lat2/180)*3.141593,(lon2/180)*3.141593
dlat,dlon = abs(dx2-dx1),abs(dy2-dy1)
a = (math.sin(dlat/2))**2 + (math.cos(dx1) * math.cos(dx2)
* (math.sin(dlon/2))**2)
c = 2*(math.atan2(math.sqrt(a),math.sqrt(1-a)))
km = 6373 * c
return km
def gen_matrix(order_list):
''' For MCVE. Returns dictionary of distance and time between all
location pairs. Should not affect run time.
Returns {(location_id_from, location_id_to): [distance, time]}
'''
matrix = {}
for loc in order_list:
for pair_loc in order_list:
if loc[0] != pair_loc[0]:
distance = crow_flies(loc[1], loc[2], pair_loc[1], pair_loc[2])
matrix[(loc[0], pair_loc[0])] = [distance, distance*random.random()]
return matrix
def gen_jobs(num_jobs, start_time, end_time, lat_low, lat_high, lon_low, lon_high):
''' For MVCE. Creates random jobs with random time windows and locations
'''
job_list = []
for x in range(num_jobs):
lat = random.uniform(lat_low, lat_high)
lon = random.uniform(lon_low, lon_high)
start = random.randrange(start_time, end_time-120, 1)
end = start + 120
faux_start = random.randrange(start, end-60, 1)
faux_end = faux_start + 60
capacity_demand = random.choice([-1, 1])
name_1 = random.choice(LETTERS)
name_2 = random.choice(LETTERS)
name_3 = random.choice(LETTERS)
name_4 = random.choice(LETTERS)
name_5 = random.choice(LETTERS)
NAME = name_1 + name_2 + name_3 + name_4 + name_5
job_list.append([NAME, lat, lon, start, end, faux_start, faux_end,
capacity_demand])
return job_list
def calc_route_cost(start_time, route, matrix):
''' Returns the cost of each randomly generated route '''
cost = 0
# Mileage cost
dist_cost = sum([matrix[(route[x][0], route[x+1][0])][0] for x in
range(len(route)-1)]) * 0.14
cost += dist_cost
# Man-hour cost
time_cost = sum([matrix[(route[x][0], route[x+1][0])][1] for x in
range(len(route)-1)]) * 0.35
cost += time_cost
for x in range(0, len(route)-1):
travel_time = matrix[(route[x][0], route[x+1][0])][1]
arrival = start_time + travel_time
start_time += travel_time
departure = arrival + 10
if arrival < route[x+1][3]:
# Penalise early arrival
arr_cost = (route[x+1][3] - arrival)**2
cost += arr_cost
elif departure > route[x+1][4]:
# Penalise late departure
dep_cost = (departure - route[x+1][4])**2
cost += dep_cost
if arrival < route[x+1][5]:
# Penalise 'soft' early arrival i.e. earlier than a fake prediction
# of arrival
faux_arr_cost = (route[x+1][5] - arrival)**1.2
cost += faux_arr_cost
elif departure > route[x+1][6]:
# Penalise 'soft' late departure
faux_dep_cost = (departure - route[x+1][6])**1.2
cost += faux_dep_cost
return cost
def hill_climb(jobs, matrix, iterations):
''' Randomly generate routes and store route if cheaper than predecessor '''
cost_tracking, iteration_track = [], []
initial_cost = calc_route_cost(480, jobs, matrix)
best_cost = initial_cost
best_route = jobs[:]
changed_route = jobs[:]
for x in range(iterations):
random.shuffle(changed_route)
new_cost = calc_route_cost(480, changed_route, matrix)
if new_cost < best_cost:
best_route = changed_route[:]
best_cost = new_cost
cost_tracking.append(best_cost)
iteration_track.append(x)
return cost_tracking, iteration_track
if __name__ == '__main__':
#random_jobs = gen_jobs(20, 480, 1080, 24, 25, 54, 55)
random_jobs = [['lmizn', 24.63441343319078, 54.766698677134784, 501, 621, 558, 618, 1],
['jwrmk', 24.45711393348282, 54.255786174435165, 782, 902, 782, 842, 1],
['gbzqc', 24.967074991405035, 54.07326911656665, 682, 802, 687, 747, 1],
['odriz', 24.54161147027789, 54.13774173532877, 562, 682, 607, 667, -1],
['majfj', 24.213785557876257, 54.452603867220475, 681, 801, 731, 791, -1],
['scybg', 24.936517492880274, 54.83786889438055, 645, 765, 662, 722, -1],
['betow', 24.78072704532661, 54.99907581479066, 835, 955, 865, 925, -1],
['jkhmp', 24.88461478479374, 54.42327833917202, 546, 666, 557, 617, -1],
['wbpnq', 24.328080543462, 54.85565694610073, 933, 1053, 961, 1021, -1],
['ezguc', 24.292203133848382, 54.65239508177714, 567, 687, 583, 643, -1],
['nlbgh', 24.111932340385735, 54.895627940055995, 675, 795, 711, 771, -1],
['rtmbc', 24.64381176454049, 54.739636798961044, 870, 990, 910, 970, 1],
['znkah', 24.235361720889216, 54.699010081109854, 627, 747, 645, 705, -1],
['yysai', 24.48931405352803, 54.37480185313546, 870, 990, 882, 942, -1],
['mkmbk', 24.5628992946158, 54.219159859450926, 833, 953, 876, 936, -1],
['wqygy', 24.035376675509728, 54.92994438408514, 693, 813, 704, 764, -1],
['gzwwa', 24.476121543580852, 54.13822533413381, 854, 974, 879, 939, 1],
['xuyov', 24.288078529689894, 54.81812092976614, 933, 1053, 935, 995, 1],
['tulss', 24.841925420359246, 54.08156783033599, 670, 790, 684, 744, -1],
['ptdng', 24.113767467325335, 54.9417036320267, 909, 1029, 941, 1001, 1]]
matrix = gen_matrix(random_jobs)
# Below is the loop that appears to accelerate
for p in range(10):
start = datetime.datetime.now()
sim_ann = hill_climb(random_jobs, matrix, 10000)
end = datetime.datetime.now()
# Number of iterations against greedy cost
iteration_count = zip(sim_ann[1], sim_ann[0])
print 'RUNTIME:', str(end - start)
print 'SOLUTION CONVERGENCE:', str(iteration_count)