- You won't be able to read the file directly with pandas
- You will need to download it
- Incidentally, the function below will work for downloading any file from the correct URL.
- The function requires the URL and the directory to save to.
import request
from pathlib import Path
import pandas as pd
def create_dir_save_file(dir_path: Path, url: str):
"""
Check if the path exists and create it if it does not.
Check if the file exists and download it if it does not.
"""
if not dir_path.parents[0].exists(): # if directory doesn't exist
dir_path.parents[0].mkdir(parents=True) # create directory
print(f'Directory Created: {dir_path.parents[0]}')
else:
print('Directory Exists')
if not dir_path.exists(): # if file doesn't exist
r = requests.get(url, allow_redirects=True) # get file
open(dir_path, 'wb').write(r.content) # write file
print(f'File Created: {dir_path.name}')
else:
print('File Exists')
data_dir = Path.cwd() # current working dir or use Path('e:/some_path') to specify a location
url = 'https://raw.githubusercontent.com/jackiekazil/data-wrangling/master/data/chp3/data-text.json'
file_name = url.split('/')[-1]
data_path = data_dir / file_name # local path to data once downloaded
create_dir_save_file(data_path, url) # call function to download file
df = pd.json_normalize(data_path) create dataframe
# display(df.head())
Indicator PUBLISH STATES Year WHO region World Bank income group Country Sex Display Value Numeric Low High Comments
0 Life expectancy at birth (years) Published 1990 Europe High-income Andorra Both sexes 77 77.0
1 Life expectancy at birth (years) Published 2000 Europe High-income Andorra Both sexes 80 80.0
2 Life expectancy at age 60 (years) Published 2012 Europe High-income Andorra Female 28 28.0
3 Life expectancy at age 60 (years) Published 2000 Europe High-income Andorra Both sexes 23 23.0
4 Life expectancy at birth (years) Published 2012 Eastern Mediterranean High-income United Arab Emirates Female 78 78.0