0

I want to create a python code that get as input a cycling calendar with different cycling races on as list like this :UCI_world_tour_2023=[ ("Santos Tour Down Under", "Adelaide", "Willunga Hill"), ("Cadel Evans Great Ocean Road Race", "Geelong", "Geelong"), ("UAE Tour", "Abu Dhabi", "Abu Dhabi")]. The idea is to rearrange the calendar so that it is more environmentally friendly by minimizing travel between races. The output needs to be the optimal calendar with the least kilometers traveled between the races. I don't have much coding experience and used a lot of Chatgpt to help me. The problem with the code below is the runtime of the script, any tips how i can optimize this script?

from geopy.geocoders import Nominatim
from math import radians, sin, cos, sqrt, atan2
from functools import lru_cache
from multiprocessing import Pool
from itertools import permutations


@lru_cache(maxsize=None)
def geocode(location):
    geolocator = Nominatim(user_agent="geoapiExercises", timeout=10)
    location = geolocator.geocode(location)
    if location is not None:
        return location.latitude, location.longitude
    else:
        return None



def haversine(location1, location2):
    lat1, lon1 = location1
    lat2, lon2 = location2
    # Convert latitude and longitude from degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    # Calculate differences in latitude and longitude
    dlat = lat2 - lat1
    dlon = lon2 - lon1

    # Calculate distance using Haversine formula
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    # Earth's radius in kilometers
    R = 6373.0

    distance = R * c

    return distance


def calculate_distance_matrix(locations_coordinates):
    n = len(locations_coordinates)
    distance_matrix = [[0] * n for _ in range(n)]
    for i in range(n):
        for j in range(i + 1, n):
            distance = haversine(locations_coordinates[i], locations_coordinates[j])
            distance_matrix[i][j] = distance_matrix[j][i] = distance
    return distance_matrix


from itertools import permutations

def calculate_tsp(distance_matrix):
    n = len(distance_matrix)
    min_distance = float('inf')
    optimal_order = None
    for order in permutations(range(n)):
        current_distance = sum(distance_matrix[order[i]][order[i + 1]] for i in range(n - 1))
        if current_distance < min_distance:
            min_distance = current_distance
            optimal_order = order
    return optimal_order, min_distance

def two_opt(distance_matrix, order):
    n = len(order)
    best_distance = calculate_distance(distance_matrix, order)
    improved = True
    while improved:
        improved = False
        for i in range(1, n - 2):
            for j in range(i + 1, n):
                if j - i == 1: continue
                new_order = order[:i] + order[i:j][::-1] + order[j:]
                new_distance = calculate_distance(distance_matrix, new_order)
                if new_distance < best_distance:
                    order = new_order
                    best_distance = new_distance
                    improved = True
                    break
            if improved: break
    return order, best_distance

def calculate_distance(distance_matrix, order):
    n = len(order)
    distance = 0
    for i in range(n - 1):
        distance += distance_matrix[order[i]][order[i + 1]]
    distance += distance_matrix[order[-1]][order[0]]
    return distance

def calculate_tsp_optimized(distance_matrix):
    order, min_distance = calculate_tsp(distance_matrix)
    order, min_distance = two_opt(distance_matrix, order)
    return order, min_distance



def optimal_calendar(calendar):
    # Extract locations from calendar and convert them to latitude and longitude coordinates
    locations = [race[1] for race in calendar]
    with Pool() as pool:
        locations_coordinates = pool.map(geocode, locations)

    # Calculate distances between all locations using the Haversine formula
    distance_matrix = calculate_distance_matrix(locations_coordinates)

    # Calculate total distance for original calendar
    original_distance = sum(distance_matrix[i][i + 1] for i in range(len(locations_coordinates) - 1))

    # Find optimal order using 2-Opt algorithm (Traveling Salesman Problem)
    optimal_order, min_distance = calculate_tsp(distance_matrix)

    # Rearrange calendar according to optimal order
    optimal_calendar = [calendar[i] for i in optimal_order]

    return optimal_calendar, min_distance, original_distance


if __name__ == '__main__':
    races =     [("Santos Tour Down Under", "Adelaide", "Willunga Hill"),
    ("UAE Tour", "Abu Dhabi", "Abu Dhabi"),
    ("Omloop Het Nieuwsblad Elite", "Gent", "Ninove"),
    ("Strade Bianche", "Siena", "Siena"),
    ("Paris-Nice", "Paris", "Nice"),
    ("Tirreno-Adriatico", "Lido di Camaiore", "San Benedetto del Tronto"),
    ("Milan-Sanremo", "Milan", "San Remo"),
    ("Volta Ciclista a Catalunya", "Calella","Barcelona"),
    ("E3 Saxo Bank Classic","Harelbeke","Harelbeke"),
    ("Gent-Wevelgem in Flanders Fields","Ypres","Wevelgem"),
    ("Ronde van Vlaanderen - Tour des Flandres","Antwerpen","Oudenaarde"),
    ("Itzulia Basque Country","Bilbao","Arrate (Eibar)"),
    ("Paris-Roubaix","Compiègne","Roubaix Velodrome"),
    ("Amstel Gold Race","Maastricht","Berg en Terblijt"),
    ("La Flèche Wallonne","Charleroi","Mur de Huy"),
    ("Liège-Bastogne-Liège","Liège Place Saint-Lambert (Liège)","Liège"),
    ("Tour de Romandie","Oron-la-Ville (Oron)","Fribourg"),
    ("Giro d'Italia ","Turin ","Milan "),
    ("Critérium du Dauphiné ","Issoire ","Les Gets "),
    ("Tour de Suisse","Frauenfeld","Andermatt"),
    ("Tour de France","Copenhagen","Paris Champs-Élysées"),
    ("Clasica Ciclista San Sebastian","San Sebastián","San Sebastián"),
    ("Tour de Pologne","Kraków","Kraków"),
    ("EuroEyes Cyclassics Hamburg","Hamburg","Hamburg"),
    ("BinckBank Tour","Blankenberge (Belgium)","Geraardsbergen (Belgium)"),
    ("La Vuelta ciclista a España ","Burgos ","Santiago de Compostela "),
    ("Il Lombardia ","Como ","Bergamo "),
    ("Tour of Guangxi ","Beihai ","Guilin ")]
    optimal_locations, optimal_distance, original_distance = optimal_calendar(races)

    print("Optimal race order:")
    for race in optimal_locations:
        print("- " + " - ".join(race))
    print("Optimal distance: {:.2f} km".format(optimal_distance))
  • Google's ORtools has a good TSP solver, see https://developers.google.com/optimization/routing/tsp for some guides on using it. You can expect to get the optimal answer for a few thousand cities reasonably easily using it – Sam Mason Mar 08 '23 at 13:04
  • Thanks for the help! The google's ORtools provided a solution. – Vic Vanwynsberghe Mar 09 '23 at 08:38

1 Answers1

0

It looks like your algorithm is O(N!) ... which is as expected for an naive algorithm that finds an exact solution.

The problem here is that TSP is NP-complete which means that algorithms for finding an exact solution are suprapolynomial. There are better algorithms than yours, but they are difficult / complex ... and still suprapolynomial for large enough N.

There are also more practical algorithms that give heuristic and approximate; i.e. not necessarily optimal.

My advice:

  • If you are doing this to improve your Pythons skills or optimization, pick another problem.
  • If you are doing this because you have a practical need for a good but suboptimal solution for large N, find an existing Python implementation of a heuristic / approximate algorithm.
  • If you are doing this because you have a practical need for an optimal solution for large N, forget it!
  • If you are just doing this out of curiosity, read the Wikipedia page below ... and then decide for yourself if should curb your curiosity or direct it elsewhere.

References:

Stephen C
  • 698,415
  • 94
  • 811
  • 1,216