Source code for climada_petals.hazard.rf_glofas.transform_ops

"""
This file is part of CLIMADA.

Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS.

CLIMADA is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free
Software Foundation, version 3.

CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.

---

Transformations for dantro data pipeline
"""

import logging
import re
from pathlib import Path
from typing import Optional, Union, List, Mapping, Any, Iterable, Tuple, Callable
from collections import deque
from copy import deepcopy

import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gpd
from scipy.stats import gumbel_r
import xesmf as xe
from numba import guvectorize

from climada.util.coordinates import get_country_geometries, country_to_iso
from .cds_glofas_downloader import glofas_request, CDS_DOWNLOAD_DIR

LOGGER = logging.getLogger(__name__)



[docs]
def sel_lon_lat_slice(target: xr.DataArray, source: xr.DataArray) -> xr.DataArray:
    """Select a lon/lat slice from 'target' using coordinates of 'source'"""
    return target.sel({c: slice(*source[c][[0, -1]]) for c in ["longitude", "latitude"]})




[docs]
def rp_comp(
    sample: np.ndarray,
    loc: np.ndarray,
    scale: np.ndarray,
    max_rp: Optional[float] = np.inf,
):
    """Compute the return period from a right-handed Gumbel distribution

    All parameters can be arrays, in which case numpy broadcasting rules apply.

    The return period of a sample :math:`x` from an extreme value distribution is
    defined as :math:`(1 - \\mathrm{cdf}(x))^{-1}`, where :math:`\\mathrm{cdf}` is the
    cumulative distribution function of said distribution.

    Parameters
    ----------
    sample : array
        Samples for which to compute the return period
    loc : array
        Loc parameter of the Gumbel distribution
    scale : array
        Scale parameter of the distribution
    max_rp : float, optional
        The maximum value of return periods. This avoids returning infinite values.
        Defaults to ``np.inf`` (no maximum).

    Returns
    -------
    np.ndarray
        The return period(s) for the input parameters
    """
    cdf = gumbel_r.cdf(sample, loc=loc, scale=scale)
    rp_from_cdf = np.where(cdf >= 1.0, np.inf, 1.0 / np.fmax(1.0 - cdf, np.spacing(1)))
    return np.fmin(rp_from_cdf, max_rp)




[docs]
def reindex(
    target: xr.DataArray,
    source: xr.DataArray,
    tolerance: Optional[float] = None,
    fill_value: float = np.nan,
    assert_no_fill_value: bool = False,
) -> xr.DataArray:
    """Reindex target to source with nearest neighbor lookup

    Parameters
    ----------
    target : xr.DataArray
        Array to be reindexed.
    source : xr.DataArray
        Array whose coordinates are used for reindexing.
    tolerance : float (optional)
        Maximum distance between coordinates. If it is superseded, the ``fill_value`` is
        inserted instead of the nearest neighbor value. Defaults to NaN
    fill_value : float (optional)
        The fill value to use if coordinates are changed by a distance of more than
        ``tolerance``.
    assert_no_fill_value : bool (optional)
        Throw an error if fill values are found in the data after reindexing. This will
        also throw an error if the fill value is present in the ``target`` before
        reindexing (because the check afterwards would else not make sense)

    Returns
    -------
    target : xr.DataArray
        Target reindexed like 'source' with nearest neighbor lookup for the data.

    Raises
    ------
    ValueError
        If tolerance is exceeded when reindexing, in case ``assert_no_fill_value`` is
        ``True``.
    ValueError
        If ``target`` already contains the ``fill_value`` before reindexing, in case
        ``assert_no_fill_value`` is ``True``.
    """

    def has_fill_value(arr):
        return arr.isin(fill_value).any() or (
            np.isnan(fill_value) and arr.isnull().any()
        )

    # Check for fill values before
    if assert_no_fill_value and has_fill_value(target):
        raise ValueError(
            f"Array '{target.name}' does already contain reindex fill value"
        )

    # Reindex operation
    target = target.reindex_like(
        source, method="nearest", tolerance=tolerance, copy=False, fill_value=fill_value
    )

    # Check for fill values after
    if assert_no_fill_value and has_fill_value(target):
        raise ValueError(
            f"Reindexing '{target.name}' to '{source.name}' exceeds tolerance! "
            "Try interpolating the datasets or increasing the tolerance"
        )

    return target




[docs]
def merge_flood_maps(flood_maps: Mapping[str, xr.DataArray]) -> xr.DataArray:
    """Merge the flood maps GeoTIFFs into one NetCDF file

    Adds a "zero" flood map (all zeros)

    Parameters
    ----------
    flood_maps : dict(str, xarray.DataArray)
        The mapping of GeoTIFF file paths to respective DataArray. Each flood map is
        identified through the folder containing it. The folders are expected to follow
        the naming scheme ``floodMapGL_rpXXXy``, where ``XXX`` indicates the return
        period of the respective map.
    """
    expr = re.compile(r"floodMapGL_rp(\d+)y")
    years = [int(expr.search(name).group(1)) for name in flood_maps]
    idx = np.argsort(years)
    darrs = list(flood_maps.values())
    darrs = [
        darrs[i].drop_vars("spatial_ref", errors="ignore").squeeze("band", drop=True)
        for i in idx
    ]

    # Add zero flood map
    # NOTE: Return period of 1 is the minimal value
    da_null_flood = xr.full_like(darrs[0], np.nan)
    darrs.insert(0, da_null_flood)

    # Concatenate and rename
    years = np.insert(np.array(years)[idx], 0, 1)
    da_flood_maps = xr.concat(darrs, pd.Index(years, name="return_period"))
    da_flood_maps = da_flood_maps.rename(x="longitude", y="latitude")
    return da_flood_maps.rename("flood_depth")




[docs]
def fit_gumbel_r(
    input_data: xr.DataArray,
    time_dim: str = "year",
    fit_method: str = "MM",
    min_samples: int = 2,
):
    """Fit a right-handed Gumbel distribution to the data

    Parameters
    ----------
    input_data : xr.DataArray
        The input time series to compute the distributions for. It must contain the
        dimension specified as ``time_dim``.
    time_dim : str
        The dimension indicating time. Defaults to ``year``.
    fit_method : str
        The method used for computing the distribution. Either ``MLE`` (Maximum
        Likelihood Estimation) or ``MM`` (Method of Moments).
    min_samples : int
        The number of finite samples along the time dimension required for a
        successful fit. If there are fewer samples, the fit result will be NaN.

    Returns
    -------
    xr.Dataset
        A dataset on the same grid as the input data with variables

        * ``loc``: The loc parameter of the fitted distribution (mode)
        * ``scale``: The scale parameter of the fitted distribution
        * ``samples``: The number of samples used to fit the distribution at this
          coordinate
    """

    def fit(time_series):
        # Count finite samples
        samples = np.isfinite(time_series)
        samples_count = np.count_nonzero(samples)
        if samples_count < min_samples:
            return np.nan, np.nan, 0

        # Mask array
        return (*gumbel_r.fit(time_series[samples], method=fit_method), samples_count)

    # Apply fitting
    loc, scale, samples = xr.apply_ufunc(
        fit,
        input_data,
        input_core_dims=[[time_dim]],
        output_core_dims=[[], [], []],
        exclude_dims={time_dim},
        vectorize=True,
        dask="parallelized",
        output_dtypes=[np.float64, np.float64, np.int32],
        dask_gufunc_kwargs=dict(allow_rechunk=True),
    )

    return xr.Dataset(dict(loc=loc, scale=scale, samples=samples))




[docs]
def download_glofas_discharge(
    product: str,
    date_from: str,
    date_to: Optional[str],
    num_proc: int = 1,
    download_path: Union[str, Path] = CDS_DOWNLOAD_DIR,
    countries: Optional[Union[List[str], str]] = None,
    preprocess: Optional[Callable] = None,
    open_mfdataset_kw: Optional[Mapping[str, Any]] = None,
    **request_kwargs,
) -> xr.DataArray:
    """Download the GloFAS data and return the resulting dataset

    Several parameters are passed directly to
    :py:func:`climada_petals.hazard.rf_glofas.cds_glofas_downloader.glofas_request`. See
    this functions documentation for further information.

    Parameters
    ----------
    product : str
        The string identifier of the product to download. See
        :py:func:`climada_petals.hazard.rf_glofas.cds_glofas_downloader.glofas_request`
        for supported products.
    date_from : str
        Earliest date to download. Specification depends on the ``product`` chosen.
    date_to : str or None
        Latest date to download. If ``None``, only download the ``date_from``.
        Specification depends on the ``product`` chosen.
    num_proc : int
        Number of parallel processes to use for downloading. Defaults to 1.
    download_path : str or pathlib.Path
        Directory to store the downloaded data. The directory (and all required parent
        directories!) will be created if it does not yet exist. Defaults to
        ``~/climada/data/glofas-discharge/``.
    countries : str or list of str, optional
        Countries to download data for. Uses the maximum extension of all countries for
        selecting the latitude/longitude range of data to download.
    preprocess : str, optional
        String expression for preprocessing the data before merging it into one dataset.
        Must be valid Python code. The downloaded data is passed as variable ``x``.
    open_mfdataset_kw : dict, optional
        Optional keyword arguments for the ``xarray.open_mfdataset`` function.
    request_kwargs:
        Keyword arguments for the Copernicus data store request.
    """
    # Create the download path if it does not yet exist
    LOGGER.debug("Preparing download directory: %s", download_path)
    download_path = Path(download_path)  # Make sure it is a Path
    download_path.mkdir(parents=True, exist_ok=True)

    # Determine area from 'countries'
    if countries is not None:
        LOGGER.debug("Choosing lat/lon bounds from countries %s", countries)
        # Fetch area and reorder appropriately
        # NOTE: 'area': north, west, south, east
        #       'extent': lon min (west), lon max (east), lat min (south), lat max (north)
        area = request_kwargs.get("area")
        if area is not None:
            LOGGER.debug("Subsetting country geometries with 'area'")
            area = [area[1], area[3], area[2], area[0]]

        # Fetch geometries and bounds of requested countries
        iso = country_to_iso(countries)
        geo = get_country_geometries(iso, extent=area)

        # NOTE: 'bounds': minx (west), miny (south), maxx (east), maxy (north)
        # NOTE: Explicitly cast to float to ensure that YAML parser can dump the data
        bounds = deque(map(float, geo.total_bounds.flat))
        bounds.rotate(1)

        # Insert into kwargs
        request_kwargs["area"] = list(bounds)

    # Request the data
    files = glofas_request(
        product=product,
        date_from=date_from,
        date_to=date_to,
        num_proc=num_proc,
        output_dir=download_path,
        request_kw=request_kwargs,
    )

    # Set arguments for 'open_mfdataset'
    open_kwargs = dict(
        chunks={}, combine="nested", concat_dim="time", preprocess=preprocess
    )
    if open_mfdataset_kw is not None:
        open_kwargs.update(open_mfdataset_kw)

    # Squeeze all dimensions except time
    arr = xr.open_mfdataset(files, **open_kwargs)["dis24"]
    dims = {dim for dim, size in arr.sizes.items() if size == 1} - {"time"}
    return arr.squeeze(dim=dims)



[docs]
def max_from_isel(
    array: xr.DataArray, dim: str, selections: List[Union[Iterable, slice]]
) -> xr.DataArray:
    """Compute the maximum over several selections of an array dimension"""
    if not all(isinstance(sel, (Iterable, slice)) for sel in selections):
        raise TypeError(
            "This function only works with iterables or slices as selection"
        )

    data = [array.isel({dim: sel}) for sel in selections]
    return xr.concat(
        [da.max(dim=dim, skipna=True) for da in data],
        dim=pd.Index(list(range(len(selections))), name="select")
        # dim=xr.concat([da[dim].max() for da in data], dim=dim)
    )




[docs]
def return_period(
    discharge: xr.DataArray,
    gev_loc: xr.DataArray,
    gev_scale: xr.DataArray,
    max_return_period: float = 1e4,
) -> xr.DataArray:
    """Compute the return period for a discharge from a Gumbel EV distribution fit

    Coordinates of the three datasets must match up to a tolerance of 1e-3 degrees. If
    they do not, an error is thrown.

    Parameters
    ----------
    discharge : xr.DataArray
        The discharge values to compute the return period for
    gev_loc : xr.DataArray
        The loc parameters for the Gumbel EV distribution
    gev_scale : xr.DataArray
        The scale parameters for the Gumbel EV distribution

    Returns
    -------
    xr.DataArray
        The equivalent return periods for the input discharge and Gumbel EV istributions

    See Also
    --------
    :py:func:`climada_petals.hazard.rf_glofas.transform_ops.rp`
    :py:func:`climada_petals.hazard.rf_glofas.transform_ops.return_period_resample`
    """
    reindex_kwargs = dict(tolerance=1e-3, fill_value=-1, assert_no_fill_value=True)
    gev_loc = reindex(gev_loc, discharge, **reindex_kwargs)
    gev_scale = reindex(gev_scale, discharge, **reindex_kwargs)

    # Apply and return
    return xr.apply_ufunc(
        rp_comp,
        discharge,
        gev_loc,
        gev_scale,
        max_return_period,
        dask="parallelized",
        output_dtypes=[np.float32],
    ).rename("Return Period")




[docs]
def return_period_resample(
    discharge: xr.DataArray,
    gev_loc: xr.DataArray,
    gev_scale: xr.DataArray,
    gev_samples: xr.DataArray,
    bootstrap_samples: int,
    max_return_period: float = 1e4,
    fit_method: str = "MLE",
) -> xr.DataArray:
    """Compute resampled return periods for a discharge from a Gumbel EV distribution fit

    This function uses bootstrap resampling to incorporate the uncertainty in the EV
    distribution fit. Bootstrap resampling takes the fitted distribution, draws N samples
    from it (where N is the number of samples originally used to fit the distribution),
    and fits a new distribution onto these samples. This "bootstrapped" distribution is
    then used to compute the return period. Repeating this process yields an ensemble of
    distributions that captures the uncertainty in the original distribution fit.

    Coordinates of the three datasets must match up to a tolerance of 1e-3 degrees. If
    they do not, an error is thrown.

    Parameters
    ----------
    discharge : xr.DataArray
        The discharge values to compute the return period for
    gev_loc : xr.DataArray
        The loc parameters for the Gumbel EV distribution
    gev_scale : xr.DataArray
        The scale parameters for the Gumbel EV distribution
    gev_samples : xr.DataArray
        The samples used to fit the Gumbel EV distribution at every point
    bootstrap_samples : int
        The number of bootstrap samples to compute. Increasing this will improve the
        representation of uncertainty, but strongly increase computational costs later
        on.
    fit_method : str
        Method for fitting the Gumbel EV during resampling. Available methods are the
        Method of Moments ``MM`` or the Maximum Likelihood Estimation ``MLE`` (default).

    Returns
    -------
    xr.DataArray
        The equivalent return periods for the input discharge and Gumbel EV
        distributions. The data array will have an additional dimension ``sample``,
        representing the bootstrap samples for every point.

    See Also
    --------
    :py:func:`climada_petals.hazard.rf_glofas.transform_ops.rp`
    :py:func:`climada_petals.hazard.rf_glofas.transform_ops.return_period`
    """
    reindex_kwargs = dict(tolerance=1e-3, fill_value=-1, assert_no_fill_value=True)
    gev_loc = reindex(gev_loc, discharge, **reindex_kwargs)
    gev_scale = reindex(gev_scale, discharge, **reindex_kwargs)
    gev_samples = reindex(gev_samples, discharge, **reindex_kwargs).astype("int32")

    # All but 'longitude' and 'latitude' are core dimensions for this operation
    core_dims = list(discharge.dims)
    core_dims.remove("longitude")
    core_dims.remove("latitude")

    # Define input array layout
    # NOTE: This depends on the actual core dimensions put in, so we have to do this
    #       programmatically.
    # num_core_dims = len(core_dims)
    # arr_str_in = "[" + ", ".join([":" for _ in range(num_core_dims)]) + "]"
    # dims_str_in = "(" + ", ".join([f"c_{i}" for i in range(num_core_dims)]) + ")"
    # arr_str_out = arr_str_in[:-1] + ", :]"
    # dims_str_out = dims_str_in[:-1] + ", samples)"
    # print(arr_str_in, dims_str_in)
    # print(arr_str_out, dims_str_out)

    # Dummy array
    # dummy = xr.DataArray(
    #     np.empty((bootstrap_samples)),
    #     coords=dict(samples=list(range(bootstrap_samples))),
    # )

    # Define the vectorized function
    # @guvectorize(
    #     (
    #             f"(float32{arr_str_in}, float64, float64, int32,"
    #             f"float64,float64[:], float32{arr_str_out})"
    #     ),
    #     f"{dims_str_in}, (), (), (), (), (samples) -> {dims_str_out}",
    #     # nopython=True,
    # )
    def rp_sampling(
        dis: np.ndarray,
        loc: float,
        scale: float,
        samples: int,
        max_rp: float,
    ):
        """Compute multiple return periods using bootstrap sampling

        This function does not support broadcasting on the ``loc`` and ``scale``
        parameters.
        """
        # Return NaNs if we have no reliable samples
        finite_input = all((np.isfinite(x) for x in (loc, scale, samples)))
        if samples < 1 or not finite_input:
            return np.full((bootstrap_samples,) + dis.shape, np.nan)

        # Resample by drawing samples and re-fitting
        def resample_params():
            return gumbel_r.fit(
                gumbel_r.rvs(loc=loc, scale=scale, size=samples),
                method=fit_method,
            )

        # Resample the distribution and compute return periods from these resamples
        return np.array(
            [
                rp_comp(dis, *resample_params(), max_rp)
                for _ in range(bootstrap_samples)
            ],
            dtype=np.float32,
        )

    # Apply and return
    # NOTE: 'rp_sampling' requires scalar 'loc' and 'scale' parameters, so we
    #       define all but 'longitude' and 'latitude' dimensions as core dimensions
    # core_dims = set(discharge.dims) - {"longitude", "latitude"}
    return (
        xr.apply_ufunc(
            rp_sampling,
            discharge,
            gev_loc,
            gev_scale,
            gev_samples,
            max_return_period,
            input_core_dims=[list(core_dims), [], [], [], []],
            output_core_dims=[["sample"] + list(core_dims)],
            vectorize=True,
            dask="parallelized",
            output_dtypes=[np.float32],
            dask_gufunc_kwargs=dict(
                output_sizes=dict(sample=bootstrap_samples), allow_rechunk=True
            ),
        )
        .rename("Return Period")
        .assign_coords(sample=np.arange(bootstrap_samples))
        .transpose(..., "sample")
    )




[docs]
def interpolate_space(
    return_period: xr.DataArray,
    flood_maps: xr.DataArray,
    method: str = "linear",
) -> xr.DataArray:
    """Interpolate the return period in space onto the flood maps grid"""
    # Select lon/lat for flood maps
    flood_maps = sel_lon_lat_slice(flood_maps, return_period)

    # Interpolate the return period
    return return_period.interp(
        coords=dict(longitude=flood_maps["longitude"], latitude=flood_maps["latitude"]),
        method=method,
        kwargs=dict(fill_value=None),  # Extrapolate
    )




[docs]
def regrid(
    return_period: xr.DataArray,
    flood_maps: xr.DataArray,
    method: str = "bilinear",
    regridder: Optional[xe.Regridder] = None,
    return_regridder: bool = False,
) -> Union[xr.DataArray, Tuple[xr.DataArray, xe.Regridder]]:
    """Regrid the return period onto the flood maps grid"""
    # Select lon/lat for flood maps
    flood_maps = sel_lon_lat_slice(flood_maps, return_period)

    # Mask return period so NaNs are not propagated
    rp = return_period.to_dataset(name="data")
    dims_to_remove = set(rp.sizes.keys()) - {"longitude", "latitude"}
    dims_to_remove = {dim: 0 for dim in dims_to_remove}
    rp["mask"] = xr.where(rp["data"].isel(dims_to_remove).isnull(), 0, 1)

    # NOTE: Masking here would omit all return periods outside flood plains
    #       (This might be desirable at some point?)
    flood = flood_maps.to_dataset(name="data")
    # flood["mask"] = xr.where(flood["data"].isel(return_period=-1).isnull(), 0, 1)

    # Perform regridding
    if regridder is None:
        regridder = xe.Regridder(
            rp,
            flood,
            method=method,
            extrap_method="nearest_s2d",
            # unmapped_to_nan=False,
        )

    return_period_regridded = regridder(return_period).rename(return_period.name)
    if return_regridder:
        return return_period_regridded, regridder

    return return_period_regridded




[docs]
def apply_flopros(
    flopros_data: gpd.GeoDataFrame,
    return_period: Union[xr.DataArray, xr.Dataset],
    layer: str = "MerL_Riv",
) -> Union[xr.DataArray, xr.Dataset]:
    """Restrict the given return periods using FLOPROS data

    The FLOPROS database describes the regional protection to river flooding based on a
    return period. Users can choose from different database layers. For each coordinate
    in ``return_period``, the value from the respective FLOPROS database layer is
    selected. Any ``return_period`` lower than or equal to the FLOPROS protection value
    is discarded and set to ``NaN``.

    Parameters
    ----------
    flopros_data : PassthroughContainer
        The FLOPROS data bundled into a dantro.containers.PassthroughContainer
    return_period : xr.DataArray or xr.Dataset
        The return periods to be restricted by the FLOPROS data
    layer : str
        The FLOPROS database layer to evaluate

    Returns
    -------
    xr.DataArray or xr.Dataset
        The ``return_period`` data with all values below the local FLOPROS protection
        threshold set to ``NaN``.
    """
    # Make GeoDataframe from existing geometry
    latitude = return_period["latitude"].values
    longitude = return_period["longitude"].values
    lon, lat = np.meshgrid(longitude, latitude, indexing="ij")
    df_geometry = gpd.GeoDataFrame(
        geometry=gpd.points_from_xy(lon.flat, lat.flat), crs="EPSG:4326"
    )

    # Merge the DataFrames, setting the FLOPROS value for each point
    df_merged = df_geometry.sjoin(flopros_data, how="left", predicate="within")

    # Available layers
    # layers = [
    #     "DL_Min_Co",
    #     "DL_Max_Co",
    #     "PL_Min_Co",
    #     "PL_Max_Co",
    #     "MerL_Riv",
    #     "DL_Min_Riv",
    #     "DL_Max_Riv",
    #     "PL_Min_Riv",
    #     "PL_Max_Riv",
    #     "ModL_Riv",
    # ]

    def data_array_from_layer(col_name):
        """Create a xr.DataArray from a GeoDataFrame column"""
        return xr.DataArray(
            data=df_merged[col_name]
            .to_numpy(dtype=np.float32)
            .reshape((longitude.size, latitude.size)),
            dims=["longitude", "latitude"],
            coords=dict(longitude=longitude, latitude=latitude),
        )

    # Apply the FLOPROS protection
    flopros = data_array_from_layer(layer)
    return return_period.where(return_period > flopros)




[docs]
def flood_depth(
    return_period: Union[xr.Dataset, xr.DataArray], flood_maps: xr.DataArray
) -> Union[xr.Dataset, xr.DataArray]:
    """Compute the flood depth from a return period and flood maps.

    At each lat/lon coordinate, take the return period(s) and use them to interpolate
    the flood maps at this position in the return period dimension. Take the interpolated
    values and return them as flood hazard footprints. Works with arbitrarily high
    dimensions in the ``return_period`` array. Interpolation is linear.

    Parameters
    ----------
    return_period : xr.DataArray or xr.Dataset
        The return periods for which to compute the flood depth. If this is a dataset,
        the function will compute the flood depth for each variable.
    flood_maps : xr.DataArray
        Maps that indicate flood depth at latitude/longitude/return period coordinates.

    Returns
    -------
    xr.DataArray or xr.Dataset
        The flood depths for the given return periods.
    """
    # Select lon/lat for flood maps
    flood_maps = sel_lon_lat_slice(flood_maps, return_period)

    # Clip infinite return periods
    return_period = return_period.clip(
        min=1, max=flood_maps["return_period"].max(), keep_attrs=True
    )

    # All but 'longitude' and 'latitude' are core dimensions for this operation
    core_dims = list(return_period.dims)
    core_dims.remove("longitude")
    core_dims.remove("latitude")

    # Define input array layout
    # NOTE: This depends on the actual core dimensions put in, so we have to do this
    #       programmatically.
    num_core_dims = len(core_dims)
    arr_str = "[" + ", ".join([":" for _ in range(num_core_dims)]) + "]"
    core_dims_str = "(" + ", ".join([f"c_{i}" for i in range(num_core_dims)]) + ")"

    # Define the vectorized function
    @guvectorize(
        f"(float32{arr_str}, float64[:], int64[:], float32{arr_str})",
        f"{core_dims_str}, (j), (j) -> {core_dims_str}",
        nopython=True,
    )
    def interpolate(return_period, hazard, return_periods, out_depth):
        """Linearly interpolate the hazard to a given return period

        Args:
            return_period (float): The return period to evaluate the hazard at
            hazard (np.array): The hazard at given return periods (dependent var)
            return_periods (np.array): The given return periods (independent var)

        Returns:
            float: The hazard at the requested return period.

            The hazard cannot become negative. Values beyond the given return periods
            range are extrapolated.
        """
        # Shortcut for only NaNs
        # NOTE: After rebuilding the hazard maps, the "1:" can be removed
        if np.all(np.isnan(hazard[1:])):
            out_depth[:] = np.full_like(return_period, np.nan)
            return

        # Make NaNs to zeros
        # NOTE: NaNs should be grouped at lower end of 'return_periods', so this should
        #       be sane.
        # hazard = np.nan_to_num(hazard)
        hazard = np.where(np.isnan(hazard), 0.0, hazard)

        # Use extrapolation and have 0.0 as minimum value
        out_depth[:] = np.interp(return_period, return_periods, hazard)
        out_depth[:] = np.maximum(out_depth, 0.0)

    # Perform operation
    out = xr.apply_ufunc(
        interpolate,
        return_period,
        flood_maps,
        flood_maps["return_period"],
        input_core_dims=[core_dims, ["return_period"], ["return_period"]],
        output_core_dims=[core_dims],
        dask="parallelized",
        output_dtypes=[np.float32],
        dask_gufunc_kwargs=dict(allow_rechunk=True),
    )
    if isinstance(out, xr.DataArray):
        out = out.rename("Flood Depth")
    return out




[docs]
def save_file(
    data: Union[xr.Dataset, xr.DataArray],
    output_path: Union[Path, str],
    encoding: Optional[Mapping[str, Any]] = None,
    engine: Optional[str] = "netcdf4",
    **encoding_defaults,
):
    """Save xarray data as a file with default compression

    Calls ``data.to_netcdf``.
    See https://docs.xarray.dev/en/stable/generated/xarray.Dataset.to_netcdf.html
    for the documentation of the underlying method.

    Parameters
    ----------
    data : xr.Dataset or xr.Dataarray
        The data to be stored in the file
    output_path : pathlib.Path or str
        The file path to store the data into. If it does not contain a suffix, ``.nc``
        is automatically appended. The enclosing folder must already exist.
    encoding : dict (optional)
        Encoding settings for every data variable. Will update the default settings.
    engine : str (optional)
        The engine used for writing the file. Defaults to ``"netcdf4"``.
    encoding_defaults
        Encoding settings shared by all data variables. This will update the default
        encoding settings, which are ``dict(dtype="float32", zlib=False, complevel=4)``.
    """
    # Promote to Dataset for accessing the data_vars
    if isinstance(data, xr.DataArray):
        data = data.to_dataset()

    # Store encoding
    default_encoding = dict(dtype="float32", zlib=False, complevel=4)
    default_encoding.update(**encoding_defaults)
    enc = {var: deepcopy(default_encoding) for var in data.data_vars}
    if encoding is not None:
        for key, settings in encoding.items():
            enc[key].update(settings)

    # Sanitize output path and write file
    output_path = Path(output_path)
    if not output_path.suffix:
        output_path = output_path.with_suffix(".nc")
    data.to_netcdf(output_path, encoding=enc, engine=engine)