Source code for fynance.features.scale

#!/usr/bin/env python3
# coding: utf-8
# @Author: ArthurBernard
# @Email: arthur.bernard.92@gmail.com
# @Date: 2020-09-11 18:47:27
# @Last modified by: ArthurBernard
# @Last modified time: 2021-03-12 22:16:21

""" Data scaling utilities.

Functions and a fit/transform-style :class:`Scale` object to standardize
or normalize one- and two-dimensional arrays before feeding them to a
machine-learning model.

Both global versions (whole-sample mean/std, min/max) and rolling
versions (computed on a lagged window) are provided. Rolling variants
are lookahead-safe and recommended for time-series ML pipelines where
using a global statistic would leak future information into training
windows.

Main entry points
-----------------
- :func:`standardize` / :func:`roll_standardize` — z-score scaling.
- :func:`normalize` / :func:`roll_normalize` — min-max scaling.
- :class:`Scale` — fit/transform wrapper that stores parameters and
  exposes a :meth:`Scale.revert` inverse.

"""

# Built-in packages

# Third party packages
import numpy as np

from fynance.features.momentums import *

# Local packages
from fynance.features.roll_functions import roll_max, roll_min

__all__ = ["normalize", "roll_normalize", "roll_standardize", "Scale",
           "standardize"]


# TODO :
#     - Use wrapper for axis for scale methods
#     - Use wrapper for axis for standardize and normalize functions
#     - Finish functions or method to scale with moving functions.


_HANDLER_MOMENTUM = {
    "s": [sma, smstd],
    "w": [wma, wmstd],
    "e": [ema, emstd],
}


def _get_norm_params(X, axis=0):
    params = {
        "m": np.min(X, axis=axis),
        "s": np.max(X, axis=axis),
    }

    return params


def _normalize(X, m, s, a, b):

    return (b - a) * (X - m) / (s - m) + a


def _revert_normalize(X, m, s, a, b):

    return _normalize(X, a, b, m, s)


def _get_std_params(X, axis=0):
    params = {
        "m": np.mean(X, axis=axis),
        "s": np.std(X, axis=axis),
    }

    return params


def _standardize(X, m, s, a, b):

    return b * (X - m) / s + a


def _revert_standardize(X, m, s, a, b):

    return _standardize(X, a, b, m, s)


def _get_roll_norm_params(X, w=None, axis=0):
    m = roll_min(X, w, axis=axis)
    s = roll_max(X, w, axis=axis)
    idx = m == s
    m[idx] = 0.
    s[idx] = 2 * s[idx]

    return {"m": m, "s": s}


def _get_roll_std_params(X, w=None, kind_moment="s", axis=0):
    m = _HANDLER_MOMENTUM[kind_moment][0](X, w=w, axis=axis)
    s = _HANDLER_MOMENTUM[kind_moment][1](X, w=w, axis=axis)
    s[s == 0] = 1.

    return {"m": m, "s": s}


[docs] class Scale: """ Fit/transform-style scaler for time-series data. Wraps the four scaling primitives (``standardize``, ``normalize``, ``roll_standardize``, ``roll_normalize``) behind a uniform fit / scale / revert API. Parameters are fitted once at construction and reused on subsequent calls — the typical pipeline pattern of fitting on a training window and applying the same transform to the test window, which avoids leaking test-period statistics into training. The ``revert`` method inverts the transformation, useful when the target of an ML model was scaled and the prediction must be converted back to the original units. Parameters ---------- X : np.ndarray[dtype, ndim=1 or 2] Data to fit the parameters of scale transformation. kind : str, optional - "std" : Standardized scale transformation (default), see :func:`~fynance.features.scale.standardize`. - "norm" : Normalized scale transformation, see :func:`~fynance.features.scale.normalize`. - "raw" : No scale is apply. - "roll_std" : Standardized scale transformation, computed with rolling mean and standard deviation (see :func:`~fynance.features.scale.roll_standardize`). - "roll_norm" : Normalized scale transformation, computed with roling minimum and maximum (see :func:`~fynance.features.scale.roll_normalize`). a, b : float or array_like, optional Some scale factors to apply after the transformation. By default is respectively 0 and 1. axis : int, optional Axis along which compute the scale parameters. Default is 0. **kwargs : keyword arguments for particular functions E.g: for rolling function set ``w`` the lagged window (see :func:`~fynance.features.scale.roll_normalize`) or for rolling standardization set ``kind_moment={"s", "w", "e"}`` (see :func:`~fynance.features.scale.roll_standardize`). Methods ------- fit scale revert Attributes ---------- func : callable The scale function. revert_func : callable The revert scale function. params : dict Parameters of the scale transformation. axis : int The axis along which is computed the scale parameters. kind : str The kind of scale transformation. See Also -------- normalize, standardize, roll_standardize, roll_normalize """ handle_func = { "raw": lambda x, a, b: x, "norm": _normalize, "std": _standardize, "roll_norm": _normalize, "roll_std": _standardize, } handle_params = { "raw": lambda x: {}, "norm": _get_norm_params, "std": _get_std_params, "roll_norm": _get_roll_norm_params, "roll_std": _get_roll_std_params, } handle_revert = { "raw": lambda x, a, b: x, "norm": _revert_normalize, "std": _revert_standardize, "roll_norm": _revert_normalize, "roll_std": _revert_standardize, } def __init__(self, X, kind="std", a=0., b=1., axis=0, **kwargs): """ Initialize the scale object. """ self.func = self.handle_func[kind] self.revert_func = self.handle_revert[kind] self.kind = kind self.axis = axis self.fit(X, kind, a, b, axis, **kwargs)
[docs] def __call__(self, X, axis=None): """ Callable method to scale data with fitted parameters. Parameters ---------- X : np.ndarray[dtype, ndim=1 or 2] Data to scale. Returns ------- np.ndarray[dtype, ndim=1 or 2] Scalled data. """ return self.scale(X, axis)
def __repr__(self): """ Return string representation. """ return ("Scale transformation '{}' with the following parameters: {}" "".format(self.kind, self.params))
[docs] def fit(self, X, kind=None, a=0., b=1., axis=0, **kwargs): """ Compute the parameters of the scale transformation. Parameters ---------- X : np.ndarray[dtype, ndim=1 or 2] Data to fit the parameters of scale transformation. kind : str, optional - "std" : Standardized scale transformation (default), see :func:`~fynance.features.scale.standardize`. - "norm" : Normalized scale transformation, see :func:`~fynance.features.scale.normalize`. - "raw" : No scale is apply. - "roll_std" : Standardized scale transformation, computed with rolling mean and standard deviation (see :func:`~fynance.features.scale.roll_standardize`). - "roll_norm" : Normalized scale transformation, computed with roling minimum and maximum (see :func:`~fynance.features.scale.roll_normalize`). a, b : float or array_like, optional Some scale factors to apply after the transformation. By default is respectively 0 and 1. axis : int, optional Axis along which compute the scale parameters. Default is 0. **kwargs : keyword arguments for particular functions E.g: for rolling function set ``w`` the lagged window (see :func:`~fynance.features.scale.roll_normalize`) or for rolling standardization set ``kind_moment={"s", "w", "e"}`` (see :func:`~fynance.features.scale.roll_standardize`). """ if kind is None: kind = self.kind if axis is None: axis = self.axis self.params = self.handle_params[kind](X, axis=axis, **kwargs) self.params.update({"a": a, "b": b})
[docs] def scale(self, X, axis=None): """ Scale the data with the fitted parameters. Parameters ---------- X : np.ndarray[dtype, ndim=1 or 2] Data to scale. Returns ------- np.ndarray[dtype, ndim=1 or 2] Scalled data. """ if axis is None: axis = self.axis if axis == 1: self.func(X.T, **self.params).T return self.func(X, **self.params)
[docs] def revert(self, X, axis=None): """ Revert the transformation of the scale with the fitted parameters. Parameters ---------- X : np.ndarray[dtype, ndim=1 or 2] Data to revert the scale. Returns ------- np.ndarray[dtype, ndim=1 or 2] The revert transformed data. """ if axis is None: axis = self.axis if axis == 1: self.revert_func(X.T, **self.params).T return self.revert_func(X, **self.params)
[docs] def standardize(X, a=0, b=1, axis=0): r""" Substitutes the mean and divid by the standard deviation. Z-score scaling: shifts the data to zero mean and unit variance, then re-scales to ``[a, a + b]`` if the optional location/scale factors are provided. The standard preprocessing for ML models that assume features on comparable scales (linear regressions, SVMs, neural networks). For time-series with regime shifts, prefer :func:`roll_standardize` to avoid leaking future statistics. Parameters ---------- X : np.ndarray[dtype, ndim=1 or 2] Data to scale. a, b : float or array_like, optional Respectively an additional and multiply factor. axis : int, optional Axis along which to scale the data. Returns ------- np.ndarray[dtype, ndim=1 or 2] The scaled data. See Also -------- Scale, normalize, roll_standardize """ m = np.mean(X, axis=axis) s = np.std(X, axis=axis) if axis == 1: return _standardize(X.T, m, s, a, b).T return _standardize(X, m, s, a, b)
[docs] def roll_standardize(X, w=None, a=0, b=1, axis=0, kind_moment="s"): r""" Substitutes the rolling mean and divid by the rolling standard dev. .. math:: RollStandardize(X)^w_t = b \times \frac{X_t - RollMean(X)^w_t} {RollStd(X)^w_t} + a Parameters ---------- X : np.ndarray[dtype, ndim=1 or 2] Data to scale. w : int, optional Size of the lagged window of the moving average/standard deviation, must be positive. If ``w is None`` or ``w=0``, then ``w=X.shape[axis]``. Default is None. a, b : float or array_like, optional Respectively an additional and multiply factor. axis : int, optional Axis along which to scale the data. kind_moment : str {"s", "w", "e"}, optional - If "s" (default) then compute basic moving averages and standard deviations, see :func:`~fynance.features.momentums.sma` and :func:`~fynance.features.momentums.smstd`. - If "w" then compute the weighted moving averages and standard deviations, see :func:`~fynance.features.momentums.wma` and :func:`~fynance.features.momentums.wmstd`. - If "e" then compute the exponential moving averages and standard deviations, see :func:`~fynance.features.momentums.ema` and :func:`~fynance.features.momentums.emstd`. Returns ------- np.ndarray[dtype, ndim=1 or 2] The scaled data. See Also -------- Scale, normalize, standardize, roll_standardize """ mean, std = _HANDLER_MOMENTUM[kind_moment] m = mean(X, w, axis=axis) s = std(X, w, axis=axis) if axis == 1: return _standardize(X.T, m, s, a, b).T return _standardize(X, m, s, a, b)
[docs] def normalize(X, a=0, b=1, axis=0): r""" Scale the data between ``a`` and ``b``. Substitutes the minimum and divid by the difference between the maximum and the minimum. Then multiply by ``b`` minus ``a`` and add ``a``. .. math:: Normalize(X) = (b - a) \times \frac{X - X_{min}}{X_{max} - X_{min}} + a Parameters ---------- X : np.ndarray[dtype, ndim=1 or 2] Data to scale. a, b : float or array_like, optional Respectively the lower and upper bound of the transformation. axis : int, optional Axis along which to scale the data. Returns ------- np.ndarray[dtype, ndim=1 or 2] The scaled data. See Also -------- Scale, standardize, roll_normalize """ m = np.min(X, axis=axis) s = np.max(X, axis=axis) if axis == 1: return _normalize(X.T, m, s, a, b).T return _normalize(X, m, s, a, b)
[docs] def roll_normalize(X, w=None, a=0, b=1, axis=0): r""" Scale the data between ``a`` and ``b``. Substitutes the rolling minimum and divid by the difference between the rolling maximum and the minimum. Then multiply by ``b`` minus ``a`` and add ``a``. .. math:: RollNormalize(X)^w_t = (b - a) \times \frac{X_t - RollMin(X)^w_t} {RollMax(X)^w_t - RollMin(X)^w_t} + a Parameters ---------- X : np.ndarray[dtype, ndim=1 or 2] Data to scale. w : int, optional Size of the lagged window of the rolling minimum/maximum, must be positive. If ``w is None`` or ``w=0``, then ``w=X.shape[axis]``. Default is None. a, b : float or array_like, optional Respectively the lower and upper bound of the transformation. axis : int, optional Axis along which to scale the data. Returns ------- np.ndarray[dtype, ndim=1 or 2] The scaled data. See Also -------- Scale, standardize, normalize, roll_standardize """ m = roll_min(X, w, axis=axis) s = roll_max(X, w, axis=axis) if axis == 1: return _normalize(X.T, m, s, a, b).T return _normalize(X, m, s, a, b)
if __name__ == "__main__": pass