I am using the following code to download and extract a zip file ~250MB.
from pathlib import Path
import os
import requests
from zipfile import ZipFile
import traceback
import sys
data_path = os.path.join(".", "data")
Path(data_path).mkdir(parents=True, exist_ok=True) # assuming Python >= 3.5
url_str = 'https://www.cs.ucr.edu/~eamonn/time_series_data_2018/UCRArchive_2018.zip'
zip_out_path = os.path.join(data_path, "UCRArchive_2018.zip")
if not os.path.exists(zip_out_path):
try:
print("starting to download dataset zip")
file_content = requests.get(url_str, timeout=10, verify=False).content
except requests.RequestException as ex:
traceback.print_exc(ex)
sys.stderr.write(r"Auto download failed. For manual download see ./zip_password.txt")
raise
print("downloaded dataset zip. Takes ~1min")
print("writing zip.")
with open(zip_out_path, 'wb') as out_file:
out_file.write(file_content)
print("extracting zip. Takes several minutes, manual is fast.")
with ZipFile(zip_out_path, 'r') as zip_ref:
zip_ref.extractall(data_path, pwd=b"someone")
print("done extracting.")
The file gets downloaded, but the extraction is extremely slow.
When manually extracting it (double click, extract), everything works fine.
What is happening and how to fix this?