How could osmnx be used in an HPC cluster?
I want to run a python this script in a cluster of type HPC in Ubuntu Server to speed up its execution time
(Currently it takes about 4 minutes to finish the process on a server without a cluster)
Attached Python code:
import sys
import networkx as nx
import osmnx as ox
import geopandas as gpd
import math
from shapely.geometry import Point
import json
ox.config(use_cache=False)
origlatTmp = float(sys.argv[1])
origlngTmp = float(sys.argv[2])
destlatTmp = float(sys.argv[3])
destlngTmp = float(sys.argv[4])
def fromAtoBpoints(origlat, origlng, destlat, destlng):
G = ox.load_graphml('/var/www/html/jalisco.graphml')
#This .graphml file was generated with:
#import osmnx as ox
#ox.config(use_cache=True, log_console=True)
#G = ox.graph_from_place('Jalisco,Mexico', network_type = 'drive', simplify=False)
#G = ox.add_edge_speeds(G)
#G = ox.add_edge_travel_times(G)
#ox.save_graphml(G, '/var/www/html/jalisco.graphml')
#print("Done!")
#Jalisco is a state of Mexico and the territorial extension of Jalisco is 78,588 km²
lats = []
lngs = []
lats.insert(0, origlat)
lats.insert(1, destlat)
lngs.insert(0, origlng)
lngs.insert(1, destlng)
points_list = [Point((lng, lat)) for lat, lng in zip(lats, lngs)]
points = gpd.GeoSeries(points_list, crs='epsg:4326')
points_proj = points.to_crs(G.graph['crs'])
nearest_nodes = [ox.distance.nearest_nodes(G, pt.x, pt.y) for pt in points_proj]
route = nx.shortest_path (G, nearest_nodes[0], nearest_nodes[1], weight='length')
time = nx.shortest_path_length(G, nearest_nodes[0], nearest_nodes[1], weight='travel_time')
#print("Tiempo:",time/60,"min")
distance = nx.shortest_path_length(G, nearest_nodes[0], nearest_nodes[1], weight='length')
#print("Distancia:",distance/1000,"km")
resultArray = []
resultArray2 = []
for a in route:
resultArray.append("{lat:"+ str(G.nodes[a]['y'])+",lng:"+ str(G.nodes[a]['x'])+"}")
return (distance/1000),"@@@",resultArray
print(fromAtoBpoints(origlatTmp,origlngTmp,destlatTmp,destlngTmp))
This code takes a long time to execute (4 minutes as I mentioned above), but what we have in mind is that it can work offline, and the solution that is proposed to solve the response time is using an HPC cluster