Source code for gstools.transform.array

"""
GStools subpackage providing array transformations.

.. currentmodule:: gstools.transform.array

The following functions are provided

Transformations
^^^^^^^^^^^^^^^

.. autosummary::
   array_discrete
   array_boxcox
   array_zinnharvey
   array_force_moments
   array_to_lognormal
   array_to_uniform
   array_to_arcsin
   array_to_uquad
"""
# pylint: disable=C0103, C0123, R0911
from warnings import warn

import numpy as np
from scipy.special import erf, erfinv

__all__ = [
    "array_discrete",
    "array_boxcox",
    "array_zinnharvey",
    "array_force_moments",
    "array_to_lognormal",
    "array_to_uniform",
    "array_to_arcsin",
    "array_to_uquad",
]


[docs]def array_discrete(
    field, values, thresholds="arithmetic", mean=None, var=None
):
    """
    Discrete transformation.

    After this transformation, the field has only `len(values)` discrete
    values.

    Parameters
    ----------
    field : :class:`numpy.ndarray`
        Normal distributed values.
    values : :any:`numpy.ndarray`
        The discrete values the field will take
    thresholds : :class:`str` or :any:`numpy.ndarray`, optional
        the thresholds, where the value classes are separated
        possible values are:
        * "arithmetic": the mean of the 2 neighbouring values
        * "equal": divide the field into equal parts
        * an array of explicitly given thresholds
        Default: "arithmetic"
    mean : :class:`float`or :any:`None`
        Mean of the field for "equal" thresholds. Default: np.mean(field)
    var : :class:`float`or :any:`None`
        Variance of the field for "equal" thresholds. Default: np.var(field)

    Returns
    -------
    :class:`numpy.ndarray`
        Transformed field.
    """
    field = np.asarray(field)
    if thresholds == "arithmetic":
        # just in case, sort the values
        values = np.sort(values)
        thresholds = (values[1:] + values[:-1]) / 2
    elif thresholds == "equal":
        mean = np.mean(field) if mean is None else float(mean)
        var = np.var(field) if var is None else float(var)
        values = np.asarray(values)
        n = len(values)
        p = np.arange(1, n) / n  # n-1 equal subdivisions of [0, 1]
        rescale = np.sqrt(var * 2)
        # use quantile of the normal distribution to get equal ratios
        thresholds = mean + rescale * erfinv(2 * p - 1)
    else:
        if len(values) != len(thresholds) + 1:
            raise ValueError(
                "discrete transformation: len(values) != len(thresholds) + 1"
            )
        values = np.asarray(values)
        thresholds = np.asarray(thresholds)
    # check thresholds
    if not np.all(thresholds[:-1] < thresholds[1:]):
        raise ValueError(
            "discrete transformation: thresholds need to be ascending"
        )
    # use a separate result so the intermediate results are not affected
    result = np.empty_like(field)
    # handle edge cases
    result[field <= thresholds[0]] = values[0]
    result[field > thresholds[-1]] = values[-1]
    for i, value in enumerate(values[1:-1]):
        result[
            np.logical_and(thresholds[i] < field, field <= thresholds[i + 1])
        ] = value
    return result


[docs]def array_boxcox(field, lmbda=1, shift=0):
    """
    (Inverse) Box-Cox transformation to denormalize data.

    After this transformation, the again Box-Cox transformed field is normal
    distributed.

    See: https://en.wikipedia.org/wiki/Power_transform#Box%E2%80%93Cox_transformation

    Parameters
    ----------
    field : :class:`numpy.ndarray`
        Normal distributed values.
    lmbda : :class:`float`, optional
        The lambda parameter of the Box-Cox transformation.
        For ``lmbda=0`` one obtains the log-normal transformation.
        Default: ``1``
    shift : :class:`float`, optional
        The shift parameter from the two-parametric Box-Cox transformation.
        The field will be shifted by that value before transformation.
        Default: ``0``
    """
    field = np.asarray(field)
    result = field + shift
    if np.isclose(lmbda, 0):
        return array_to_lognormal(result)
    if np.min(result) < -1 / lmbda:
        warn("Box-Cox: Some values will be cut off!")
    return (np.maximum(lmbda * result + 1, 0)) ** (1 / lmbda)


[docs]def array_zinnharvey(field, conn="high", mean=None, var=None):
    """
    Zinn and Harvey transformation to connect low or high values.

    Parameters
    ----------
    field : :class:`numpy.ndarray`
        Normal distributed values.
    conn : :class:`str`, optional
        Desired connectivity. Either "low" or "high".
        Default: "high"
    mean : :class:`float` or :any:`None`, optional
        Mean of the given field. If None is given, the mean will be calculated.
        Default: :any:`None`
    var : :class:`float` or :any:`None`, optional
        Variance of the given field.
        If None is given, the variance will be calculated.
        Default: :any:`None`

    Returns
    -------
    :class:`numpy.ndarray`
        Transformed field.
    """
    field = np.asarray(field)
    mean = np.mean(field) if mean is None else float(mean)
    var = np.var(field) if var is None else float(var)
    result = np.abs((field - mean) / np.sqrt(var))
    result = np.sqrt(2) * erfinv(2 * erf(result / np.sqrt(2)) - 1)
    if conn == "high":
        result = -result
    return result * np.sqrt(var) + mean


[docs]def array_force_moments(field, mean=0, var=1):
    """
    Force moments of a normal distributed field.

    Parameters
    ----------
    field : :class:`numpy.ndarray`
        Normal distributed values.
    mean : :class:`float`, optional
        Desired mean of the field.
        Default: 0
    var : :class:`float` or :any:`None`, optional
        Desired variance of the field.
        Default: 1

    Returns
    -------
    :class:`numpy.ndarray`
        Transformed field.
    """
    field = np.asarray(field)
    var_in = np.var(field)
    mean_in = np.mean(field)
    rescale = np.sqrt(var / var_in)
    return rescale * (field - mean_in) + mean


[docs]def array_to_lognormal(field):
    """
    Transform normal distribution to log-normal distribution.

    Parameters
    ----------
    field : :class:`numpy.ndarray`
        Normal distributed values.

    Returns
    -------
    :class:`numpy.ndarray`
        Transformed field.
    """
    return np.exp(field)


[docs]def array_to_uniform(field, mean=None, var=None, low=0.0, high=1.0):
    """
    Transform normal distribution to uniform distribution on [low, high].

    Parameters
    ----------
    field : :class:`numpy.ndarray`
        Normal distributed values.
    mean : :class:`float` or :any:`None`, optional
        Mean of the given field. If None is given, the mean will be calculated.
        Default: :any:`None`
    var : :class:`float` or :any:`None`, optional
        Variance of the given field.
        If None is given, the variance will be calculated.
        Default: :any:`None`
    low : :class:`float`, optional
        Lower bound for the uniform distribution.
        Default: 0.0
    high : :class:`float`, optional
        Upper bound for the uniform distribution.
        Default: 1.0

    Returns
    -------
    :class:`numpy.ndarray`
        Transformed field.
    """
    field = np.asarray(field)
    mean = np.mean(field) if mean is None else float(mean)
    var = np.var(field) if var is None else float(var)
    return (
        0.5 * (1 + erf((field - mean) / np.sqrt(2 * var))) * (high - low) + low
    )


[docs]def array_to_arcsin(field, mean=None, var=None, a=None, b=None):
    """
    Transform normal distribution to arcsin distribution.

    See: https://en.wikipedia.org/wiki/Arcsine_distribution

    Parameters
    ----------
    field : :class:`numpy.ndarray`
        Normal distributed values.
    mean : :class:`float` or :any:`None`, optional
        Mean of the given field. If None is given, the mean will be calculated.
        Default: :any:`None`
    var : :class:`float` or :any:`None`, optional
        Variance of the given field.
        If None is given, the mean will be calculated.
        Default: :any:`None`
    a : :class:`float`, optional
        Parameter a of the arcsin distribution (lower bound).
        Default: keep mean and variance
    b : :class:`float`, optional
        Parameter b of the arcsin distribution (upper bound).
        Default: keep mean and variance

    Returns
    -------
    :class:`numpy.ndarray`
        Transformed field.
    """
    field = np.asarray(field)
    mean = np.mean(field) if mean is None else float(mean)
    var = np.var(field) if var is None else float(var)
    a = mean - np.sqrt(2.0 * var) if a is None else float(a)
    b = mean + np.sqrt(2.0 * var) if b is None else float(b)
    return _uniform_to_arcsin(array_to_uniform(field, mean, var), a, b)


[docs]def array_to_uquad(field, mean=None, var=None, a=None, b=None):
    """
    Transform normal distribution to U-quadratic distribution.

    See: https://en.wikipedia.org/wiki/U-quadratic_distribution

    Parameters
    ----------
    field : :class:`numpy.ndarray`
        Normal distributed values.
    mean : :class:`float` or :any:`None`, optional
        Mean of the given field. If None is given, the mean will be calculated.
        Default: :any:`None`
    var : :class:`float` or :any:`None`, optional
        Variance of the given field.
        If None is given, the variance will be calculated.
        Default: :any:`None`
    a : :class:`float`, optional
        Parameter a of the U-quadratic distribution (lower bound).
        Default: keep mean and variance
    b : :class:`float`, optional
        Parameter b of the U-quadratic distribution (upper bound).
        Default: keep mean and variance

    Returns
    -------
    :class:`numpy.ndarray`
        Transformed field.
    """
    field = np.asarray(field)
    mean = np.mean(field) if mean is None else float(mean)
    var = np.var(field) if var is None else float(var)
    a = mean - np.sqrt(5.0 / 3.0 * var) if a is None else float(a)
    b = mean + np.sqrt(5.0 / 3.0 * var) if b is None else float(b)
    return _uniform_to_uquad(array_to_uniform(field, mean, var), a, b)


def _uniform_to_arcsin(field, a=0, b=1):
    """
    PPF of your desired distribution.

    The PPF is the inverse of the CDF and is used to sample a distribution
    from uniform distributed values on [0, 1]

    in this case: the arcsin distribution
    See: https://en.wikipedia.org/wiki/Arcsine_distribution
    """
    field = np.asarray(field)
    return (b - a) * np.sin(np.pi * 0.5 * field) ** 2 + a


def _uniform_to_uquad(field, a=0, b=1):
    """
    PPF of your desired distribution.

    The PPF is the inverse of the CDF and is used to sample a distribution
    from uniform distributed values on [0, 1]

    in this case: the U-quadratic distribution
    See: https://en.wikipedia.org/wiki/U-quadratic_distribution
    """
    field = np.asarray(field)
    al = 12 / (b - a) ** 3
    be = (a + b) / 2
    ga = (a - b) ** 3 / 8
    y_raw = 3 * field / al + ga
    result = np.zeros_like(y_raw)
    result[y_raw > 0] = y_raw[y_raw > 0] ** (1 / 3)
    result[y_raw < 0] = -((-y_raw[y_raw < 0]) ** (1 / 3))
    return result + be