"""Capabilities for quantile-based sensitivity analysis.
This module contains functions to calculate the global sensitivity measures based on
quantiles of the output introduced by Kucherenko et al.(2019). Both the brute force
and double loop reordering MC estimators are included.
"""
import chaospy as cp
import numpy as np
import pandas as pd
from scipy.stats import expon
from scipy.stats import norm
from scipy.stats import uniform
[docs]def mc_quantile_measures(
estimator,
func,
n_params,
loc,
scale,
dist_type,
n_draws,
sampling_scheme="sobol",
seed=0,
skip=0,
):
r"""Compute the MC/QMC estimators of quantile-based global sensitivity measures.
The algorithm is described in Section 4 of Kucherenko et al.(2019).
Parameters
----------
estimator : string
Specify the Monte Carlo estimator. One of ["brute force", "DLR"], where "DLR" denotes
to the double loop reordering approach.
func : callable
Objective function to estimate the quantile-based measures. Must be broadcastable.
n_params : int
Number of parameters of objective function.
loc : float or np.ndarray
The location(`loc`) keyword passed to `scipy.stats.norm`_ function to shift the
location of "standardized" distribution. Specifically, for normal distribution
it specifies the mean array with the length of `n_params`.
.. _scipy.stats.norm: https://docs.scipy.org/doc/scipy/reference/generated/
_scipy.stats.norm.html
scale : float or np.ndarray
The `scale` keyword passed to `scipy.stats.norm`_ function to adjust the scale of
"standardized" distribution. Specifically, for normal distribution it specifies
the covariance matrix of shape (n_params, n_params).
dist_type : str
The distribution type of inputs. Options are "Normal", "Exponential" and "Uniform".
n_draws : int
Number of Monte Carlo draws. For double loop reordering estimator,
S. Kucherenko and S. Song(2017). suggests that `n_draws` should always be equal
to :math:`2^p` to preserve the uniformity properties , where :math:`p`
is an integer. In this function :math:`p` should be integers between 6 and 15 if
`estimator` is "DLR".
sampling_scheme : str, optional
One of ["random", "sobol"], default "sobol".
seed : int, optional
Random number generator seed.
skip : int, optional
Number of values to skip of Sobol sequence. Default is `0`.
Returns
-------
df_measures : pd.DataFrame
DataFrame containing quantile-based sensitivity measures.
"""
# range of alpha
dalp = (0.98 - 0.02) / 30
alpha_grid = np.arange(0.02, 0.98 + dalp, dalp) # len(alpha_grid) = 31
# get the two independent groups of sample points
x, x_prime = _unconditional_samples(
n_draws,
n_params,
dist_type,
loc,
scale,
sampling_scheme,
seed=0,
skip=0,
)
# get the conditional sample sets
if estimator == "brute force":
x_mix = _bf_conditional_samples(x, x_prime)
elif estimator == "DLR":
x_mix = _dlr_conditional_samples(x)
else:
raise ValueError("Argument 'estimator' is not in {'brute force', 'DLR'}.")
# quantiles of output with unconditional input
quantile_y_x = _unconditional_quantile_y(x, alpha_grid, func)
# quantiles of output with conditional input
quantile_y_x_mix = _conditional_quantile_y(x_mix, func, alpha_grid)
# Get quantile based measures
q_1, q_2 = _quantile_measures(quantile_y_x, quantile_y_x_mix)
# Get normalized quantile based measures
norm_q_1, norm_q_2 = _normalized_quantile_measures(q_1, q_2)
# store results
dict_measures = {
"q_1": pd.DataFrame(q_1),
"q_2": pd.DataFrame(q_2),
"Q_1": pd.DataFrame(norm_q_1),
"Q_2": pd.DataFrame(norm_q_2),
}
df_measures = pd.concat(dict_measures.values(), axis=0)
df_measures.index = pd.MultiIndex.from_product(
[dict_measures.keys(), alpha_grid],
names=["Measures", "alpha"],
)
df_measures.columns = [f"x_{i + 1}" for i in range(n_params)]
return df_measures
def _unconditional_samples(
n_draws,
n_params,
dist_type,
loc,
scale,
sampling_scheme,
seed=0,
skip=0,
):
"""Generate two independent groups of sample points.
Parameters
----------
n_draws : int
Number of Monte Carlo draws.
n_params : int
Number of parameters of objective function.
dist_type : str
The distribution type of input. Options are "Normal", "Exponential" and "Uniform".
loc : float or np.ndarray
The location(`loc`) keyword passed to `scipy.stats.norm`_ function to shift the
location of "standardized" distribution.
scale : float or np.ndarray
The `scale` keyword passed to `scipy.stats.norm`_ function to adjust the scale of
"standardized" distribution.
sampling_scheme : str, optional
One of ["sobol", "random"]
seed : int, optional
Random number generator seed. Default is 0.
skip : int, optional
Number of values to skip of Sobol sequence. Default is `0`.
Returns
-------
x, x_prime : np.ndarray
Two arrays of shape (n_draws, n_params) with i.i.d draws from a joint distribution.
"""
# Generate uniform distributed samples
np.random.seed(seed)
if sampling_scheme == "sobol":
u = cp.generate_samples(
order=n_draws + skip,
domain=n_params,
rule="S",
).T
elif sampling_scheme == "random":
u = np.random.uniform(size=(n_draws, n_params))
else:
raise ValueError("Argument 'sampling_scheme' is not in {'sobol', 'random'}.")
skip = skip if sampling_scheme == "sobol" else 0
u = cp.generate_samples(order=n_draws, domain=2 * n_params, rule="S").T
u_1 = u[skip:, :n_params]
u_2 = u[skip:, n_params:]
# Transform uniform draws into a joint PDF
if dist_type == "Normal":
z = norm.ppf(u_1)
z_prime = norm.ppf(u_2)
cholesky = np.linalg.cholesky(scale)
x = loc + cholesky.dot(z.T).T
x_prime = loc + cholesky.dot(z_prime.T).T
elif dist_type == "Exponential":
x = expon.ppf(u_1, loc, scale)
x_prime = expon.ppf(u_2, loc, scale)
elif dist_type == "Uniform":
x = uniform.ppf(u_1, loc, scale)
x_prime = uniform.ppf(u_2, loc, scale)
else:
raise NotImplementedError
return x, x_prime
def _bf_conditional_samples(x, x_prime):
"""Generate mixed sample sets distributed accroding to a conditional PDF.
Parameters
----------
x : np.ndarray
Array with shape (n_draws, n_params).
x_prime : np.ndarray
Array with shape (n_draws, n_params).
Returns
-------
x_mix : np.ndarray
Mixed sample sets. Shape has the form (n_draws, n_params, n_draws, n_params).
"""
n_draws, n_params = x.shape
x_mix = np.zeros((n_draws, n_params, n_draws, n_params))
for i in range(n_params):
for j in range(n_draws):
x_mix[j, i] = x
x_mix[j, i, :, i] = x_prime[j, i]
return x_mix
def _dlr_conditional_samples(x):
"""Generate conditional sample sets.
Parameters
----------
x : np.ndarray
Draws from a joint distribution. Shape has the form (n_draws, n_params).
Returns
-------
x_mix : np.ndarray
Mixed sample sets. Shape has the form (m, n_params, n_draws, n_params), where m
is the number of conditional bins.
"""
n_draws, n_params = x.shape
# The dependence of m versus n_draws accroding to S. Kucherenko and S. Song(2017).
if n_draws == 2 ** 6:
m = 2 ** 3
elif n_draws in [2 ** 7, 2 ** 8, 2 ** 9]:
m = 2 ** 4
elif n_draws == 2 ** 10:
m = 2 ** 5
elif n_draws in [2 ** 11, 2 ** 12, 2 ** 13]:
m = 2 ** 6
elif n_draws in [2 ** 14, 2 ** 15]:
m = 2 ** 7
else:
raise NotImplementedError
conditional_bin = x[:m]
x_mix = np.zeros((m, n_params, n_draws, n_params))
# subdivide unconditional samples into M eaually bins, within each bin x_i being fixed.
for i in range(n_params):
for j in range(m):
x_mix[j, i] = x
x_mix[j, i, :, i] = conditional_bin[j, i]
return x_mix
def _unconditional_quantile_y(x, alpha_grid, func):
"""Return quantiles of outputs with unconditional input.
Parameters
----------
x : np.ndarray
Draws from a joint distribution. Shape has the form (n_draws, n_params).
alpha_grid : np.ndarray
A sequence of evenly spaced values on the interval (0, 1).
func : callable
Objective function to calculate the quantile-based measures. Must be broadcastable.
Returns
-------
quantile_y_x : np.ndarray
Quantiles of outputs corresponding to alpha with unconditional inputs.
Shape has the form (len(alpha_grid),).
"""
n_draws = x.shape[0]
# Equation 21a
y_x = func(x)
y_x_asc = np.sort(y_x)
q_index = (np.floor(alpha_grid * n_draws)).astype(int)
quantile_y_x = y_x_asc[q_index]
return quantile_y_x
def _conditional_quantile_y(x_mix, func, alpha_grid):
"""Return quantiles of outputs with conditional input.
Parameters
----------
x_mix : np.ndarray
Mixed draws. Shape has the form (m, n_params, n_draws, n_params).
func : callable
Objective function to calculate the quantile-based measures. Must be broadcastable.
alpha_grid : np.ndarray
A sequence of evenly spaced values on the interval (0, 1).
Returns
-------
quantile_y_x_mix : np.ndarray
Quantiles of output corresponding to alpha with conditional inputs. Shape has the form
(m, n_params, len(alpha_grid), 1), where m is the number of conditional bins.
"""
m, n_params, n_draws = x_mix.shape[:3]
y_x_mix = np.zeros((m, n_params, n_draws, 1))
y_x_mix_asc = np.zeros((m, n_params, n_draws, 1))
quantile_y_x_mix = np.zeros((m, n_params, len(alpha_grid), 1))
# Equation 21b/26. Get quantiles within each bin.
for i in range(n_params):
for j in range(m):
# values of conditional outputs
y_x_mix[j, i] = np.vstack(func(x_mix[j, i]))
y_x_mix_asc[j, i] = np.sort(y_x_mix[j, i], axis=0)
for pp, a in enumerate(alpha_grid):
quantile_y_x_mix[j, i, pp] = y_x_mix_asc[j, i][
(np.floor(a * n_draws)).astype(int)
] # quantiles corresponding to alpha
return quantile_y_x_mix
def _quantile_measures(quantile_y_x, quantile_y_x_mix):
"""Estimate the values of quantile based measures."""
m, n_params, len_alp = quantile_y_x_mix.shape[:3]
# initialization
q_1 = np.zeros((len_alp, n_params))
q_2 = np.zeros((len_alp, n_params))
delt = np.zeros((m, n_params, len_alp, 1))
# Equation 24&25&27&28
for j in range(m):
for i in range(n_params):
for pp in range(len_alp):
delt[j, i, pp] = quantile_y_x_mix[j, i, pp] - quantile_y_x[pp]
q_1[pp, i] = np.mean(np.absolute(delt[:, i, pp]))
q_2[pp, i] = np.mean(delt[:, i, pp] ** 2)
return q_1, q_2
def _normalized_quantile_measures(q_1, q_2):
"""Estimate the values of normalized quantile based measures."""
len_alp, n_params = q_1.shape
# initialization
sum_q_1 = np.zeros(len_alp)
sum_q_2 = np.zeros(len_alp)
norm_q_1 = np.zeros((len_alp, n_params))
norm_q_2 = np.zeros((len_alp, n_params))
# Equation 13 & 14
for pp in range(len_alp):
sum_q_1[pp] = np.sum(q_1[pp, :])
sum_q_2[pp] = np.sum(q_2[pp, :])
for i in range(n_params):
norm_q_1[pp, i] = q_1[pp, i] / sum_q_1[pp]
norm_q_2[pp, i] = q_2[pp, i] / sum_q_2[pp]
return norm_q_1, norm_q_2