1

I am trying to download multiple netcdf4 files from GES DISC, but I seem to be having trouble with the Authorization.

'fpath' is location of the netcdf4 file. If I was to paste into address bar, a pop box will appear for 'https://urs.earthdata.nasa.gov' requiring username and password. If entered successfully, the file would download. However using 'fpath' in request.get() does not work.

request.get() successfully connects if I use 'https://urs.earthdata.nasa.gov' instead of fpath, but then I cannot download the netcdf4 file.

I've tried solution mentioned here but no luck.

Any help be appreciated

Code example below

import requests
from requests.auth import HTTPBasicAuth
from datetime import timedelta, date


def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)


start_date = date(2016, 1, 1)
end_date = date(2016, 1, 2)

for single_date in daterange(start_date, end_date):
    YYYY = single_date.strftime("%Y")
    MM = single_date.strftime("%m")
    DD = single_date.strftime("%d")
    fpath1 = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2I1NXASM.5.12.4/' + YYYY + '/' + MM + '/'
    fpath2 = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc?'
    fpath3 = 'U2M[0:23][94:160][469:534],TROPT[0:23][94:160][469:534],TROPPB[0:23][94:160][469:534],' \
             'T2M[0:23][94:160][469:534],TQL[0:23][94:160][469:534],TOX[0:23][94:160][469:534],' \
             'PS[0:23][94:160][469:534],V50M[0:23][94:160][469:534],DISPH[0:23][94:160][469:534],' \
             'TO3[0:23][94:160][469:534],TS[0:23][94:160][469:534],T10M[0:23][94:160][469:534],' \
             'TROPPT[0:23][94:160][469:534],TQI[0:23][94:160][469:534],SLP[0:23][94:160][469:534],' \
             'TQV[0:23][94:160][469:534],V2M[0:23][94:160][469:534],TROPQ[0:23][94:160][469:534],' \
             'V10M[0:23][94:160][469:534],U50M[0:23][94:160][469:534],U10M[0:23][94:160][469:534],' \
             'QV2M[0:23][94:160][469:534],TROPPV[0:23][94:160][469:534],' \
             'QV10M[0:23][94:160][469:534],time,lat[94:160],lon[469:534]'
    fpath = fpath1 + fpath2 + fpath3
    print(fpath)

    # This successfully connects
    # response = requests.get('https://urs.earthdata.nasa.gov', auth=HTTPBasicAuth('username', 'password'))
    # print(response)

    # This one does not
    response = requests.get(fpath, auth=HTTPBasicAuth('username', 'password'))
    print(response)

Note - anyone can create a free account to access this data by going to this website

Bobby Heyer
  • 531
  • 5
  • 18

1 Answers1

3

Thank you @Stovfl for pointing me in the right direction.

Guidance led me to This website which contained information on how to set up a session for earthdata

the updated complete code is below

import requests
from datetime import timedelta, date

def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)

start_date = date(2016, 1, 1)
end_date = date(2019, 7, 31)

# ***********************
# overriding requests.Session.rebuild_auth to maintain headers when redirected
# ***********************
class SessionWithHeaderRedirection(requests.Session):
    AUTH_HOST = 'urs.earthdata.nasa.gov'

    def __init__(self, username, password):
        super().__init__()
        self.auth = (username, password)

    # Overrides from the library to keep headers when redirected to or from the NASA auth host.
    def rebuild_auth(self, prepared_request, response):
        headers = prepared_request.headers
        url = prepared_request.url
        if 'Authorization' in headers:
            original_parsed = requests.utils.urlparse(response.request.url)
            redirect_parsed = requests.utils.urlparse(url)
            if (original_parsed.hostname != redirect_parsed.hostname) and \
               redirect_parsed.hostname != self.AUTH_HOST and \
               original_parsed.hostname != self.AUTH_HOST:
                del headers['Authorization']
        return


# create session with the user credentials that will be used to authenticate access to the data
username = "USERNAME"
password = "PASSWORD"
session = SessionWithHeaderRedirection(username, password)

# ***********************
# Loop through Files
# ***********************
for single_date in daterange(start_date, end_date):
    YYYY = single_date.strftime("%Y")
    MM = single_date.strftime("%m")
    DD = single_date.strftime("%d")
    fpath1 = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2I1NXASM.5.12.4/' + YYYY + '/' + MM + '/'
    fpath2 = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc?'
    fpath3 = 'U2M[0:23][94:160][469:534],TROPT[0:23][94:160][469:534],TROPPB[0:23][94:160][469:534],' \
             'T2M[0:23][94:160][469:534],TQL[0:23][94:160][469:534],TOX[0:23][94:160][469:534],' \
             'PS[0:23][94:160][469:534],V50M[0:23][94:160][469:534],DISPH[0:23][94:160][469:534],' \
             'TO3[0:23][94:160][469:534],TS[0:23][94:160][469:534],T10M[0:23][94:160][469:534],' \
             'TROPPT[0:23][94:160][469:534],TQI[0:23][94:160][469:534],SLP[0:23][94:160][469:534],' \
             'TQV[0:23][94:160][469:534],V2M[0:23][94:160][469:534],TROPQ[0:23][94:160][469:534],' \
             'V10M[0:23][94:160][469:534],U50M[0:23][94:160][469:534],U10M[0:23][94:160][469:534],' \
             'QV2M[0:23][94:160][469:534],TROPPV[0:23][94:160][469:534],' \
             'QV10M[0:23][94:160][469:534],time,lat[94:160],lon[469:534]'
    url = fpath1 + fpath2 + fpath3
    # print(url)

    # extract the filename from the url to be used when saving the file
    filename = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc'
    print(filename)

    try:
        # submit the request using the session
        response = session.get(url, stream=True)
        print(response.status_code)

        # raise an exception in case of http errors
        response.raise_for_status()

        # save the file
        with open(filename, 'wb') as fd:
            for chunk in response.iter_content(chunk_size=1024 * 1024):
                fd.write(chunk)

    except requests.exceptions.HTTPError as e:
        # handle any errors here
        print(e)
Bobby Heyer
  • 531
  • 5
  • 18