"""
This file is part of CLIMADA.
Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS.
CLIMADA is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free
Software Foundation, version 3.
CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.
---
Define Landslide class.
"""
__all__ = ['Landslide']
import logging
import geopandas as gpd
import numpy as np
import pandas as pd
from scipy import sparse
from scipy.stats import binom, poisson
import shapely
from climada.hazard import Hazard, Centroids
import climada.util.coordinates as u_coord
from climada.util.constants import DEF_CRS
#DEF_CRS = 'EPSG:4326'
LOGGER = logging.getLogger(__name__)
HAZ_TYPE = 'LS'
def sample_events(prob_matrix, n_years, dist='binom'):
"""
Sample yearly events for a specified time span (n_years) from
a matrix representing annual occurrence probabilities per grid cell
(prob_matrix). The matrix will usually be provided throught the
Landslide.from_prob() method and refer to grid-cells and their annual
sliding probabilities.
Events are drawn from the specified distribution (binomial or
poisson).
Returns n_year events for the whole grid, each event being representative
of all slides happening in 1 year throughout the entire grid-area.
Parameters
----------
prob_matrix : np.array()
matrix where each entry has a probability [0,1] of occurrence of
an event. Shape of array can be any and is determined by hazard area
and resolution of probability-source-file when called through
Landslide.from_prob().
n_years : int
the timespan of the probabilistic simulation in years. Each year will
result in 1 event.
dist : str, 'binom' or 'poisson'
distribution to sample from. Default is 'binom'.
Returns
-------
scipy.sparse.csr_matrix :
shape is (shape(prob_matrix),n_years). Each non-zero entry is 1 (re-
ferring to a 'hit' in that year and grid cell.)
See also
--------
sample_event_from_probs()
Landslide.from_prob()
"""
events = [
sample_event_from_probs(prob_matrix, n_samples=1, dist=dist)
for i in range(n_years)
]
return sparse.csr_matrix(events)
def sample_event_from_probs(prob_matrix, n_samples, dist):
"""
Samples the number of 'hits' (events) out of a given number of trials (n_samples)
from a specified distribution (poisson or binomial). Different (spatial)
occurrence probabilities are indicated in the probability matrix,
representative of cells across a grid.
Returns number of events / hits per grid cell (shape of prob_matrix).
Parameters
----------
prob_matrix : np.array()
matrix where each entry has a probability [0,1] of occurrence
Shape of array can be an. Usually it refers to probailities per grid
cells and is determined by hazard area
and resolution of probability-source-file when called through
Landslide.from_prob().
n_samples : int
the number of samples . Will be 1 if
called from sample_events().
dist : str, 'binom' or 'poisson'
distribution to sample random number of hits from, given n_samples and
prob_matrix.
For 'binom', the random variate is sampled from the binomial distribution
centred around expectation value μ = n_sampes*occurrence probability.
For 'poisson', the random variate is sampled from the poisson distribution
centred around expectation value λ = n_sampes*occurrence probability.
Returns
-------
ev_matrix : np.array()
array of same shape as prob_matrix. Each entry contains the number of
'hits' obtained during sampling-period n_years and will range
from [0,n_years]
See also
--------
sample_events()
scipy.stats.binom.rvs(), scipy.stats.poisson.rvs()
"""
if dist == 'binom':
ev_matrix = binom.rvs(n=n_samples, p=prob_matrix)
elif dist == 'poisson':
# λ (or μ in scipy)
mu = prob_matrix * n_samples
ev_matrix = poisson.rvs(mu)
return ev_matrix
[docs]
class Landslide(Hazard):
"""Landslide Hazard set generation."""
[docs]
def __init__(self):
"""Empty constructor."""
Hazard.__init__(self, HAZ_TYPE)
[docs]
@classmethod
def from_hist(cls, bbox, input_gdf, res=0.0083333):
"""
Set historic landslide (ls) raster hazard from historical point records,
for example as can be retrieved from the NASA COOLR initiative,
which is the largest global ls repository, for a specific geographic
extent.
Points are assigned to the gridcell they fall into, and the whole grid-
cell hence counts as equally affected.
Event frequencies from an incomplete dataset are not meaningful and
hence aren't set by default. probabilistic calculations!
Use the probabilistic method for this!
See tutorial for details; the global ls catalog from NASA COOLR can bedownloaded from
https://maps.nccs.nasa.gov/arcgis/apps/webappviewer/index.html?id=824ea5864ec8423fb985b33ee6bc05b7
Note
-----
The grid which is generated has the same projection as the geodataframe
with point occurrences. By default, this is EPSG:4326, which is a non-
projected, geographic CRS. This means, depending on where on the globe
the analysis is performed, the area per gridcell differs vastly.
Consider this when setting your resoluton (e.g. at the equator,
1° ~ 111 km). In turn, one can use projected CRS which preserve angles
and areas within the reference area for which they are defined. To do
this, reproject the input_gdf to the desired projection.
For more on projected & geographic CRS, see
https://desktop.arcgis.com/en/arcmap/10.3/guide-books/map-projections/about-projected-coordinate-systems.htm
Parameters
----------
bbox : tuple
(minx, miny, maxx, maxy) geographic extent of interest
input_gdf : str or or geopandas geodataframe
path to shapefile (.shp) with ls point data or already laoded gdf
res : float
resolution in units of the input_gdf crs of the final grid cells
which are created. Whith EPSG:4326, this is degrees. Default is
0.008333.
Returns
-------
Landslide : Landslide
instance filled with historic LS hazard
set for either point hazards or polygons with specified
surrounding extent.
"""
haz = cls()
if isinstance(input_gdf, gpd.GeoDataFrame):
LOGGER.info('Using pre-loaded gdf')
gdf_cropped = input_gdf.copy().cx[bbox[0]:bbox[2], bbox[1]:bbox[3]]
else:
LOGGER.info('Reading in gdf from source %s', input_gdf)
gdf_cropped = gpd.read_file(input_gdf, bbox=bbox)
LOGGER.info('Generating a raster with resolution %s for box %s', res, bbox)
if not gdf_cropped.crs:
gdf_cropped.crs = DEF_CRS
haz.centroids = Centroids.from_pnt_bounds(bbox, res, crs=gdf_cropped.crs)
n_ev = len(gdf_cropped)
# assign lat-lon points of LS events to corresponding grid & flattened
# grid-index
grid_height, grid_width, grid_transform = u_coord.pts_to_raster_meta(bbox, (res, -res))
gdf_cropped['flat_ix'] = u_coord.assign_grid_points(
gdf_cropped.geometry.x, gdf_cropped.geometry.y,
grid_width, grid_height, grid_transform)
haz.intensity = sparse.csr_matrix(
(np.ones(n_ev), (np.arange(n_ev), gdf_cropped.flat_ix)),
shape=(n_ev, haz.centroids.size))
haz.fraction = haz.intensity.copy()
if hasattr(gdf_cropped, 'ev_date'):
haz.date = pd.to_datetime(gdf_cropped.ev_date, yearfirst=True)
else:
LOGGER.info('No event dates set from source')
if haz.date.size > 0:
haz.frequency = np.ones(n_ev)/(
(haz.date.max()-haz.date.min()).value/3.154e+16)
else:
LOGGER.warning('no event dates to derive proxy frequency from,'+
'setting arbitrarily to once per year.')
haz.frequency = np.ones(n_ev)
haz.units = ''
haz.event_id = np.arange(n_ev, dtype=int) + 1
haz.orig = np.ones(n_ev, bool)
haz.check()
return haz
[docs]
def set_ls_hist(self, *args, **kwargs):
"""
This function is deprecated, use Landslide.from_hist instead.
"""
LOGGER.warning("The use of Landlide.set_ls_hist is deprecated."
"Use Landslide.from_hist instead")
self.__dict__ = Landslide.from_hist(*args, **kwargs).__dict__
[docs]
@classmethod
def from_prob(cls, bbox, path_sourcefile, corr_fact=10e6, n_years=500,
dist='poisson'):
"""
Set probabilistic landslide hazard (fraction, intensity and
frequency) for a defined bounding box and time period from a raster.
The hazard data for which this function is explicitly written
is readily provided by UNEP & the Norwegian Geotechnical Institute
(NGI), and can be downloaded and unzipped from
https://preview.grid.unep.ch/index.php?preview=data&events=landslides&evcat=2&lang=eng
for precipitation-triggered landslide and from
https://preview.grid.unep.ch/index.php?preview=data&events=landslides&evcat=1&lang=eng
for earthquake-triggered landslides.
It works with any similar raster file.
Original data is given in expected annual probability and percentage
of pixel of occurrence of a potentially destructive landslide event
x 1000000 (so be sure to adjust this by setting the correction factor).
More details can be found in the landslide tutorial and under above-
mentioned links.
Events are sampled from annual occurrence probabilites via binomial or
poisson distribution. An event therefore includes all landslides
sampled to occur within a year for the given area.
intensity takes a binary value (occurrence or no occurrene of a LS);
frequency is set to 1 / n_years.
Impact functions, since they act on the intensity, should hence be in
the form of a step function,
defining impact for intensity 0 and (close to) 1.
Parameters
----------
bbox : tuple
(minx, miny, maxx, maxy) geographic extent of interest
path_sourcefile : str
path to UNEP/NGI ls hazard file (.tif)
corr_fact : float or int
factor by which to divide the values in the original probability
file, in case it is not scaled to [0,1]. Default is 1'000'000
n_years : int
sampling period
dist : str
distribution to sample from. 'poisson' (default) and 'binom'
Returns
-------
haz : climada.hazard.Landslide instance
probabilistic LS hazard
See also
--------
sample_events()
"""
haz = cls()
# raster with occurrence probs
haz.centroids.meta, prob_matrix = \
u_coord.read_raster(path_sourcefile, geometry=[shapely.geometry.box(*bbox, ccw=True)])
prob_matrix = prob_matrix.squeeze()/corr_fact
# sample events from probabilities
haz.intensity = sample_events(prob_matrix, n_years, dist)
haz.fraction = haz.intensity.copy()
haz.fraction[haz.intensity.nonzero()] = 1
haz.frequency = np.ones(n_years) / n_years
# meaningless, such that check() method passes:
haz.date = np.array([])
haz.event_name = np.array(range(n_years))
haz.event_id = np.array(range(n_years))
if not haz.centroids.meta['crs'].is_epsg_code:
haz.centroids.meta['crs'] = haz.centroids.meta['crs'
].from_user_input(DEF_CRS)
haz.centroids.set_geometry_points()
haz.check()
return haz
[docs]
def set_ls_prob(self, *args, **kwargs):
"""
This function is deprecated, use Landslide.from_prob instead.
"""
LOGGER.warning("The use of Landlide.set_ls_prob is deprecated."
"Use Landslide.from_prob instead")
self.__dict__ = Landslide.from_prob(*args, **kwargs).__dict__