The program I am creating is designed to create several points within several zip codes, and find the distance from each of those points to every point in every zip code that is within 5 miles of the zip code of interest. This is done by utilizing google maps distance matrix service and capturing the distance data. I created a lengthy function that does several things (I think it needs to be broken up some more). Here is where I think the problem is.
Everything works as it should, however, when I go to create an executable using Pyinstaller I receive several errors when it first loads and when I try to run the main function of my program. The errors seem centered around pyproj and geopandas.
I have seen this problem in a few other places. I was not able to successfully apply the solutions discussed in those places. The solutions that were presented consisted of:
downgrade pyproj to 1.9.6 - see error below
add a hook file in pyinstaller - there was a hook file already in the directory
include pyproj._datadir and pyproj.datadir in the hidden imports section of the created spec file.
use os.environ['PROJ_LIB'] and set it to the share folder found at "/share
Packages I am using:
import pandas as pd
import tkinter as tk
import tkinter.filedialog
import os
import geopandas as gpd
from shapely.geometry import Point,LineString
import shapely.wkt
import googlemaps
from googlemaps.exceptions import ApiError
import datetime
from statistics import median
import _thread
Spec file:
# -*- mode: python -*-
block_cipher = None
a = Analysis(['main.py'],
pathex=['C:\\Users\\Keagan\\PycharmProjects\\upwork_jobs\\pet_sitting2\\gui'],
binaries=[],
datas=[],
hiddenimports=['fiona._shim','fiona.schema','pyproj._datadir','pyproj.datadir'],
hookspath=[],
runtime_hooks=[],
excludes=[],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,
noarchive=False)
pyz = PYZ(a.pure, a.zipped_data,
cipher=block_cipher)
exe = EXE(pyz,
a.scripts,
[],
exclude_binaries=True,
name='main',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
console=True )
coll = COLLECT(exe,
a.binaries,
a.zipfiles,
a.datas,
strip=False,
upx=True,
name='main')
Main function:
def model_distances(self, reference_names_list, reference_zips_df,zips_and_points_gdf,api_key):
gmaps = googlemaps.Client(api_key)
def find_key_value_connection(poi, to_location, list_of_dicts):
for item in list_of_dicts:
if poi == item["poi"] and to_location == item["to_location"] or to_location == item[
"poi"] and poi == \
item["to_location"]:
return True
return False
def projection(origin_projection, to_projection, geometry_object):
project = partial(
pyproj.transform,
pyproj.Proj(init=origin_projection),
pyproj.Proj(init=to_projection)
)
return transform(project, geometry_object)
zip_code_intersect_list = []
completed_locations_dict = {}
completed_locations_list = []
count = 0
google_credit_count = 0
completed_locations_df = None
buffer_list = []
for name in reference_names_list:
print("we are on: {}".format(name))
if os.path.isfile("output_files/completed_locations_{}.xlsx".format(name)) and completed_locations_df is None:
print("found backup, opening it")
completed_locations_df = pd.read_excel("output_files/completed_locations_{}.xlsx".format(name))
for item in completed_locations_df.itertuples():
completed_locations_dict["poi"] = int(item.poi)
completed_locations_dict["to_location"] = int(item.to_location)
completed_locations_dict["poi_zip"] = item.poi_zip
completed_locations_dict["to_zip"] = item.to_zip
completed_locations_dict["poi_name"] = item.poi_name
completed_locations_dict["to_name"] = item.to_name
completed_locations_dict["id"] = item.id
completed_locations_dict["distance"] = float(item.distance)
completed_locations_dict["time"] = float(item.time)
completed_locations_list.append(completed_locations_dict.copy())
elif not os.path.isfile("output_files/completed_locations_{}.xlsx".format(name)):
print("creating a backup")
completed_locations_df = pd.DataFrame()
completed_locations_df.to_excel("completed_locations_{}.xlsx".format(name))
for zip in reference_zips_df.itertuples():
if zip.name == name:
print("we are in zipcode: {}".format(zip.zip))
for poi in zips_and_points_gdf.itertuples():
if str(poi.zip_left) == str(zip.zip):
buffer = ""
poi_zip = ""
if poi_zip == None or poi.zip_left != poi_zip:
poi_zip = poi.zip_left
buffer = shapely.wkt.loads(poi.zip_center_point).buffer(8046)
buffer_list.append(buffer)
for to_location in zips_and_points_gdf.itertuples():
if poi.zip_left != to_location.zip_left and to_location.geometry.intersects(
buffer) and to_location.zip_left not in zip_code_intersect_list:
zip_code_intersect_list.append(to_location.zip_left)
for to_location in zips_and_points_gdf.itertuples():
if to_location.zip_left in zip_code_intersect_list and to_location.name_left == name:
if find_key_value_connection(int(poi.Index), int(to_location.Index),
completed_locations_list):
print(
"point at index {} was already calculated to point at index {}, google credit at: {}".format(
poi.Index, to_location.Index, google_credit_count))
else:
google_credit_count += 1
count += 1
print(
"calculating point at index {} to index {}, google credit at: {}".format(
poi.Index, to_location.Index, google_credit_count))
new_poi = projection("epsg:26910", "epsg:4326", poi.geometry)
new_to_location = projection("epsg:26910", "epsg:4326", to_location.geometry)
result = gmaps.distance_matrix((new_poi.y, new_poi.x),
(new_to_location.y,new_to_location.x))
completed_locations_dict["poi"] = int(poi.Index)
completed_locations_dict["to_location"] = int(to_location.Index)
completed_locations_dict["poi_zip"] = poi.zip_left
completed_locations_dict["to_zip"] = to_location.zip_left
completed_locations_dict["poi_name"] = zip.name
completed_locations_dict["to_name"] = to_location.name_left
completed_locations_dict["id"] = str(poi.zip_left) + str(
poi.Index) + "-" + str(to_location.zip_left) + str(to_location.Index)
try:
completed_locations_dict["time"] = \
result["rows"][0]["elements"][0]["duration"]["value"] / 60
completed_locations_dict["distance"] = \
result["rows"][0]["elements"][0]["distance"]["value"] / 1609.3
except KeyError:
completed_locations_dict["time"] = "nan"
completed_locations_dict["distance"] = "nan"
completed_locations_list.append(completed_locations_dict.copy())
if count > 500:
print("backup exists appending new df to backup")
completed_locations_df = pd.DataFrame(completed_locations_list)
completed_locations_df.to_excel("output_files/completed_locations_{}.xlsx".format(name))
count = 0
if google_credit_count >= 10000:
continue_program = input(
"desired google credit has hit $50, continue or change keys?(continue/change/quit): ")
while continue_program != "continue":
if continue_program == "quit":
# with open("backup_save.json", "w") as backup_file:
# json.dump(completed_locations_list.copy(), backup_file)
completed_locations_df = pd.DataFrame(completed_locations_list)
completed_locations_df.to_excel("output_files/completed_locations_{}.xlsx".format(name))
print("saving to excel")
quit()
new_key = input("please insert a new key: ")
gmaps = googlemaps.Client(key=new_key)
try:
# res = gmaps.geocode("Austin, Texas")
continue_program = input("valid key, continue? (continue/quit): ")
except (ValueError, ApiError):
new_key = input("invalid key, try again: ")
google_credit_count = 0
zip_code_intersect_list = []
completed_locations_df = pd.DataFrame(completed_locations_list)
return completed_locations_df
When trying to downgrade pyproj to 1.9.6:
UnsatisfiableError: The following specifications were found to be incompatible with each other:
When first starting up the tool:
Warning:
The MATPLOTLIBDATA environment variable was deprecated in Matplotlib 3.1 and will be removed in 3.3.
exec(bytecode, module.__dict__)
Traceback (most recent call last):
File "site-packages\pyproj\datadir.py", line 101, in get_data_dir
pyproj.exceptions.DataDirError: Valid PROJ data directory not found.Either set the path using the environmental variable PROJ_LIB or with `pyproj.datadir.set_data_dir`.
Exception ignored in: 'pyproj._datadir.get_pyproj_context'
Traceback (most recent call last):
File "site-packages\pyproj\datadir.py", line 101, in get_data_dir
pyproj.exceptions.DataDirError: Valid PROJ data directory not found.Either set the path using the environmental variable PROJ_LIB or with `pyproj.datadir.set_data_dir`.
proj_create: Cannot find proj.db
proj_create: init=epsg:/init=IGNF: syntax not supported in non-PROJ4 emulation mode
Invalid projection: +init=epsg:4326 +type=crs
When running the tool:
<code that runs fine before>
Traceback (most recent call last):
File "site-packages\pyproj\datadir.py", line 101, in get_data_dir
pyproj.exceptions.DataDirError: Valid PROJ data directory not found.Either set the path using the environmental variable PROJ_LIB or with `pyproj.datadir.set_data_dir`.
Exception ignored in: 'pyproj._datadir.get_pyproj_context'
Traceback (most recent call last):
File "site-packages\pyproj\datadir.py", line 101, in get_data_dir
pyproj.exceptions.DataDirError: Valid PROJ data directory not found.Either set the path using the environmental variable PROJ_LIB or with `pyproj.datadir.set_data_dir`.
proj_create: Cannot find proj.db
proj_create: init=epsg:/init=IGNF: syntax not supported in non-PROJ4 emulation mode
Unhandled exception in thread started by <bound method ZipAnalysisGUI.analyze_data of <__main__.ZipAnalysisGUI object at 0x000001DAAD51A668>>
Traceback (most recent call last):
File "main.py", line 480, in analyze_data
File "main.py", line 237, in model_distances
File "main.py", line 157, in projection
File "site-packages\pyproj\proj.py", line 147, in __init__
File "site-packages\pyproj\crs.py", line 391, in from_user_input
File "site-packages\pyproj\crs.py", line 260, in __init__
File "pyproj/_crs.pyx", line 1292, in pyproj._crs._CRS.__init__
pyproj.exceptions.CRSError: Invalid projection: +init=epsg:26910 +type=crs
What I think it is getting caught up based on the error above:
def projection(origin_projection, to_projection, geometry_object):
project = partial(
pyproj.transform,
pyproj.Proj(init=origin_projection),
pyproj.Proj(init=to_projection)
)
return transform(project, geometry_object)
Again it all works well when ran from pycharm. Once I try to run it as an executable it begins to fall apart. I am fairly certain it is messing up with the above function, but I am unable to determine why. I can share more code or the entire file if needed.