Regridding from one irregular lat lon grid to another irregular lat lon grid

Question

I have two sets of satellite data. For both sets, I have the pixel geometry (latitude and longitude of each corner of the pixel). I would like to regrid one set to the other. Thus, my goal is area-weighted regridding from an irregular grid to another irregular grid. I am aware of xESMF, but am unsure if that is the best tool for the job. Perhaps iris area weighting regrid would be appropriate?

Huite Bootsma · Answer 1 · 2021-10-23T11:28:19.163

I've ran into similar things in the past. I'm on Windows, and xEMSF wasn't really an option for me.

I've written this package, and added some methods for computing grid to grid weights: https://github.com/Deltares/numba_celltree

(You can pip install it.)

The data structure can deal with fully unstructured 2D meshes, and expects the data in such a format. See the code below.

You will need to make some changes: your coordinates aren't named x and y most likely. You will also need to update the ugrid2d_topology function somewhat, since I'm assuming regular quadrilateral grids here (but they're irregular when seen in each others coordinate system).

It's still pretty straightforward, just make sure you have 2D array of vertices, and a face_node_connectivity array of shape (n_cell, 4) which maps for every face its four vertices. See this documention for a little more background: https://ugrid-conventions.github.io/ugrid-conventions/


import numpy as np
import pandas as pd
import pyproj
import xarray as xr
from numba_celltree import CellTree2d

FloatArray = np.ndarray
IntArray = np.ndarray


def _coord(da, dim):
    """
    Transform N xarray midpoints into N + 1 vertex edges
    """
    delta_dim = "d" + dim  # e.g. dx, dy, dz, etc.

    # If empty array, return empty
    if da[dim].size == 0:
        return np.array(())

    if delta_dim in da.coords:  # equidistant or non-equidistant
        dx = da[delta_dim].values
        if dx.shape == () or dx.shape == (1,):  # scalar -> equidistant
            dxs = np.full(da[dim].size, dx)
        else:  # array -> non-equidistant
            dxs = dx
        _check_monotonic(dxs, dim)

    else:  # undefined -> equidistant
        if da[dim].size == 1:
            raise ValueError(
                f"DataArray has size 1 along {dim}, so cellsize must be provided"
                " as a coordinate."
            )
        dxs = np.diff(da[dim].values)
        dx = dxs[0]
        atolx = abs(1.0e-4 * dx)
        if not np.allclose(dxs, dx, atolx):
            raise ValueError(
                f"DataArray has to be equidistant along {dim}, or cellsizes"
                " must be provided as a coordinate."
            )
        dxs = np.full(da[dim].size, dx)

    dxs = np.abs(dxs)
    x = da[dim].values
    if not da.indexes[dim].is_monotonic_increasing:
        x = x[::-1]
        dxs = dxs[::-1]

    # This assumes the coordinate to be monotonic increasing
    x0 = x[0] - 0.5 * dxs[0]
    x = np.full(dxs.size + 1, x0)
    x[1:] += np.cumsum(dxs)
    return x


def _ugrid2d_dataset(
    node_x: FloatArray,
    node_y: FloatArray,
    face_x: FloatArray,
    face_y: FloatArray,
    face_nodes: IntArray,
) -> xr.Dataset:
    ds = xr.Dataset()
    ds["mesh2d"] = xr.DataArray(
        data=0,
        attrs={
            "cf_role": "mesh_topology",
            "long_name": "Topology data of 2D mesh",
            "topology_dimension": 2,
            "node_coordinates": "node_x node_y",
            "face_node_connectivity": "face_nodes",
            "edge_node_connectivity": "edge_nodes",
        },
    )
    ds = ds.assign_coords(
        node_x=xr.DataArray(
            data=node_x,
            dims=["node"],
        )
    )
    ds = ds.assign_coords(
        node_y=xr.DataArray(
            data=node_y,
            dims=["node"],
        )
    )
    ds["face_nodes"] = xr.DataArray(
        data=face_nodes,
        coords={
            "face_x": ("face", face_x),
            "face_y": ("face", face_y),
        },
        dims=["face", "nmax_face"],
        attrs={
            "cf_role": "face_node_connectivity",
            "long_name": "Vertex nodes of mesh faces (counterclockwise)",
            "start_index": 0,
            "_FillValue": -1,
        },
    )
    ds.attrs = {"Conventions": "CF-1.8 UGRID-1.0"}
    return ds



def ugrid2d_topology(data: Union[xr.DataArray, xr.Dataset]) -> xr.Dataset:
    """
    Derive the 2D-UGRID quadrilateral mesh topology from a structured DataArray
    or Dataset, with (2D-dimensions) "y" and "x".

    Parameters
    ----------
    data: Union[xr.DataArray, xr.Dataset]
        Structured data from which the "x" and "y" coordinate will be used to
        define the UGRID-2D topology.

    Returns
    -------
    ugrid_topology: xr.Dataset
        Dataset with the required arrays describing 2D unstructured topology:
        node_x, node_y, face_x, face_y, face_nodes (connectivity).
    """
    # Transform midpoints into vertices
    # These are always returned monotonically increasing
    x = data["x"].values
    xcoord = _coord(data, "x")
    if not data.indexes["x"].is_monotonic_increasing:
        xcoord = xcoord[::-1]
    y = data["y"].values
    ycoord = _coord(data, "y")
    if not data.indexes["y"].is_monotonic_increasing:
        ycoord = ycoord[::-1]
    # Compute all vertices, these are the ugrid nodes
    node_y, node_x = (a.ravel() for a in np.meshgrid(ycoord, xcoord, indexing="ij"))
    face_y, face_x = (a.ravel() for a in np.meshgrid(y, x, indexing="ij"))
    linear_index = np.arange(node_x.size, dtype=np.int32).reshape(
        ycoord.size, xcoord.size
    )
    # Allocate face_node_connectivity
    nfaces = (ycoord.size - 1) * (xcoord.size - 1)
    face_nodes = np.empty((nfaces, 4))
    # Set connectivity in counterclockwise manner
    face_nodes[:, 0] = linear_index[:-1, 1:].ravel()  # upper right
    face_nodes[:, 1] = linear_index[:-1, :-1].ravel()  # upper left
    face_nodes[:, 2] = linear_index[1:, :-1].ravel()  # lower left
    face_nodes[:, 3] = linear_index[1:, 1:].ravel()  # lower right
    # Tie it together
    ds = _ugrid2d_dataset(node_x, node_y, face_x, face_y, face_nodes)
    return ds


def area_weighted_mean(
    da: xr.DataArray,
    destination_index: np.ndarray,
    source_index: np.ndarray,
    weights: np.ndarray,
):
    """
    Area weighted mean.

    Parameters
    ----------
    da: xr.DataArray
        Contains source data.
    destination_index: np.ndarray
        In which destination the overlap is located.
    source_index: np.ndarray
        In which source cell the overlap is located.
    weights: np.ndarray
        Area of each overlap.

    Returns
    -------
    destination_index: np.ndarray
    values: np.ndarray
    """
    values = da.data.ravel()[source_index]
    df = pd.DataFrame(
        {"dst": destination_index, "area": weights, "av": weights * values}
    )
    aggregated = df.groupby("dst").sum("sum", min_count=1)
    out = aggregated["av"] / aggregated["area"]
    return out.index.values, out.values


class Regridder:
    """
    Regridder to reproject and/or regrid rasters.  When no ``crs_source`` and
    ``crs_destination`` are provided, it is assumed that ``source`` and
    ``destination`` share the same coordinate system.

    Note that an area weighted regridding method only makes sense for projected
    (Cartesian!) coordinate systems.

    Parameters
    ----------
    source: xr.DataArray
        Source example. Must have dimensions ("y", "x").
    destination: xr.DataArray
        Destination example. Must have dimensions ("y", "x").
    crs_source: optional, default: None
    crs_destination: optional, default: None
    """

    def __init__(
        self,
        source: xr.DataArray,
        destination: xr.DataArray,
        crs_source=None,
        crs_destination=None,
    ):
        src = ugrid2d_topology(source)
        dst = ugrid2d_topology(destination)
        src_yy = src["node_y"].values
        src_xx = src["node_x"].values
        if crs_source and crs_destination:
            transformer = pyproj.Transformer.from_crs(
                crs_from=crs_source, crs_to=crs_destination, always_xy=True
            )
            src_xx, src_yy = transformer.transform(xx=src_xx, yy=src_yy)
        elif crs_source ^ crs_destination:
            raise ValueError("Received only one of (crs_source, crs_destination)")

        src_vertices = np.column_stack([src_xx, src_yy])
        src_faces = src["face_nodes"].values.astype(int)
        dst_vertices = np.column_stack((dst["node_x"].values, dst["node_y"].values))
        dst_faces = dst["face_nodes"].values
        celltree = CellTree2d(src_vertices, src_faces, fill_value=-1)

        self.source = source.copy()
        self.destination = destination.copy()
        (
            self.destination_index,
            self.source_index,
            self.weights,
        ) = celltree.intersect_faces(
            dst_vertices,
            dst_faces,
            fill_value=-1,
        )

    def regrid(self, da: xr.DataArray, fill_value=np.nan):
        """
        Parameters
        ----------
        da: xr.DataArray
            Data to regrid.
        fill_value: optional, default: np.nan
            Default value of the output grid, e.g. where no overlap occurs.

        Returns
        -------
        regridded: xr.DataArray
            Data of da, regridded using an area weighted mean.
        """
        src = self.source
        if not (np.allclose(da["y"], src["y"]) and np.allclose(da["x"], src["x"])):
            raise ValueError("da does not match source")
        index, values = area_weighted_mean(
            da,
            self.destination_index,
            self.source_index,
            self.weights,
        )
        data = np.full(self.destination.shape, fill_value)
        data.ravel()[index] = values
        out = self.destination.copy(data=data)
        out.name = da.name
        return out


# Example use
da = xr.open_dataarray("gw_abstraction_sum.nc")
like = xr.open_dataarray("example.nc")

regridder = Regridder(
    source=da, destination=like, crs_source=4326, crs_destination=3035
)
result = regridder.regrid(da)

result.to_netcdf("area-weighted_sum.nc")

Regridding from one irregular lat lon grid to another irregular lat lon grid

1 Answers1