#!/usr/bin/env python3
# coding: utf-8
""" Statistical helpers (accuracy, directional accuracy, z-score). """
from __future__ import annotations
# Built-in packages
# Third-party packages
import numpy as np
from numpy.typing import NDArray
# Local packages
from fynance._wrappers import WrapperArray
from fynance.features._metrics_helpers import * # noqa: F401,F403
from fynance.features.metrics_cy import *
__all__ = ['accuracy', 'directional_accuracy', 'percent_positive',
'tail_ratio', 'z_score', 'roll_z_score', 'mad', 'roll_mad']
[docs]
@WrapperArray('axis')
def accuracy(y_true: NDArray, y_pred: NDArray, sign: bool = True, axis: int = 0) -> float:
r""" Compute the accuracy of prediction.
Notes
-----
.. math::
accuracy = \frac{right}{right + wrong}
Parameters
----------
y_true : np.ndarray[ndim=1 or 2, dtype]
Vector of true series.
y_pred : np.ndarray[ndim=1 or 2, dtype]
Vector of predicted series.
sign : bool, optional
- If True then check sign accuracy (default).
- Else check exact accuracy.
axis : {0, 1}, optional
Axis along wich the computation is done. Default is 0.
Returns
-------
float or np.ndarray[ndim=1, float]
Accuracy of prediction as float between 0 and 1.
Examples
--------
>>> y_true = np.array([1., .5, -.5, .8, -.2])
>>> y_pred = np.array([.5, .2, -.5, .1, .0])
>>> accuracy(y_true, y_pred)
0.8
>>> accuracy(y_true, y_pred, sign=False)
0.2
See Also
--------
mdd, calmar, sharpe, drawdown
"""
if sign:
y_true = np.sign(y_true)
y_pred = np.sign(y_pred)
return np.sum(y_true == y_pred, axis=axis) / y_true.shape[axis]
[docs]
@WrapperArray('axis')
def directional_accuracy(
y_true: NDArray, y_pred: NDArray, axis: int = 0,
) -> float:
r""" Compute the directional accuracy of a prediction.
Fraction of periods where the predicted direction (sign) matches the
true direction. A value of 1.0 means perfect directional alignment;
0.5 is random; 0.0 means systematically wrong direction.
Notes
-----
.. math::
directionalAccuracy = \frac{1}{T} \sum_{t=1}^{T}
\mathbf{1}[\text{sign}(\hat{y}_t) = \text{sign}(y_t)]
Parameters
----------
y_true : np.ndarray[ndim=1 or 2, dtype]
Vector of true values (returns or price changes).
y_pred : np.ndarray[ndim=1 or 2, dtype]
Vector of predicted values.
axis : {0, 1}, optional
Axis along which the computation is done. Default is 0.
Returns
-------
float or np.ndarray[ndim=1, float]
Directional accuracy between 0 and 1.
Examples
--------
>>> y_true = np.array([1., .5, -.5, .8, -.2])
>>> y_pred = np.array([.5, .2, -.5, .1, .0])
>>> directional_accuracy(y_true, y_pred)
0.8
See Also
--------
accuracy
"""
return np.mean(np.sign(y_true) == np.sign(y_pred), axis=axis)
[docs]
@WrapperArray('dtype', 'axis', 'window')
def z_score(X: NDArray, w: int = 0, kind: str = 's', axis: int = 0, dtype=None) -> NDArray:
r""" Compute the Z-score of each `X`' series.
Notes
-----
Compute the z-score function for a specific average and standard deviation
function such that:
.. math:: z = \frac{X_t - \mu_t}{\sigma_t}
Where :math:`\mu_t` is the average and :math:`\sigma_t` is the standard
deviation.
Parameters
----------
X : np.ndarray[dtype, ndim=1 or 2]
Series of index, prices or returns.
w : int, optional
Size of the lagged window of the moving averages, must be positive. If
``w is None`` or ``w=0``, then ``w=X.shape[axis]``. Default is None.
kind : {'e', 's', 'w'}
- If 'e' then use exponential moving average, see
:func:`~fynance.features.momentums.ema` for details.
- If 's' (default) then use simple moving average, see
:func:`~fynance.features.momentums.sma` for details.
- If 'w' then use weighted moving average, see
:func:`~fynance.features.momentums.wma` for details.
axis : {0, 1}, optional
Axis along wich the computation is done. Default is 0.
dtype : np.dtype, optional
The type of the output array. If `dtype` is not given, infer the data
type from `X` input.
Returns
-------
dtype or np.ndarray[dtype, ndim=1]
Value of Z-score for each series.
Examples
--------
>>> X = np.array([70, 100, 80, 120, 160, 80]).astype(np.float64)
>>> z_score(X, w=3, kind='e')
-1.0443574118998766
>>> z_score(X, w=3)
-1.224744871391589
>>> z_score(X.reshape([6, 1]), w=3)
array([-1.22474487])
See Also
--------
roll_z_score, mdd, calmar, drawdown, sharpe
"""
if kind == 'e':
w = 1 - 2 / (1 + w) # type: ignore[assignment]
avg = _handler_ma[kind.lower()](X, w)
std = _handler_mstd[kind.lower()](X, w)
std[std == 0.] = 1.
z = (X - avg) / std
return z[-1]
[docs]
@WrapperArray('dtype', 'axis', 'window')
def roll_z_score(X: NDArray, w: int | None = None, kind: str = 's', axis: int = 0, dtype=None) -> NDArray:
r""" Compute vector of rolling/moving Z-score function.
Notes
-----
Compute for each observation the z-score function for a specific moving
average function such that :math:`\forall t \in [1:T]`:
.. math::
z_t = \frac{X_t - \mu_t}{\sigma_t}
Where :math:`\mu_t` is the moving average and :math:`\sigma_t` is the
moving standard deviation.
Parameters
----------
X : np.ndarray[dtype, ndim=1 or 2]
Series of index, prices or returns.
w : int, optional
Size of the lagged window of the moving averages, must be positive. If
``w is None`` or ``w=0``, then ``w=X.shape[axis]``. Default is None.
kind : {'e', 's', 'w'}
- If 'e' then use exponential moving average, see
:func:`~fynance.features.momentums.ema` for details.
- If 's' (default) then use simple moving average, see
:func:`~fynance.features.momentums.sma` for details.
- If 'w' then use weighted moving average, see
:func:`~fynance.features.momentums.wma` for details.
axis : {0, 1}, optional
Axis along wich the computation is done. Default is 0.
dtype : np.dtype, optional
The type of the output array. If `dtype` is not given, infer the data
type from `X` input.
Returns
-------
np.ndarray[dtype, ndim=1 or 2]
Vector of Z-score at each period.
Examples
--------
>>> X = np.array([70, 100, 80, 120, 160, 80]).astype(np.float64)
>>> roll_z_score(X, w=3, kind='e')
array([ 0. , 1.41421356, -0.32444284, 1.30806216, 1.27096675,
-1.04435741])
>>> roll_z_score(X, w=3)
array([ 0. , 1. , -0.26726124, 1.22474487, 1.22474487,
-1.22474487])
See Also
--------
z_score, roll_mdd, roll_calmar, roll_mad, roll_sharpe
"""
if kind == 'e':
w = 1 - 2 / (1 + w) # type: ignore[assignment, operator]
avg = _handler_ma[kind.lower()](X, w)
std = _handler_mstd[kind.lower()](X, w)
std[std == 0.] = 1.
z = (X - avg) / std
return z
[docs]
@WrapperArray('axis')
def percent_positive(X: NDArray, axis: int = 0) -> NDArray:
r""" Fraction of strictly positive observations.
A simple robustness statistic: the share of periods with a positive
return (the "hit rate" / percentage of winning periods).
Parameters
----------
X : np.ndarray[dtype, ndim=1 or 2]
Series of returns.
axis : {0, 1}, optional
Axis of computation. Default 0.
Returns
-------
float or np.ndarray
Fraction in ``[0, 1]`` of strictly positive values.
Examples
--------
>>> X = np.array([0.1, -0.2, 0.3, 0.0, 0.4])
>>> float(percent_positive(X))
0.6
"""
return np.mean(X > 0, axis=0)
[docs]
@WrapperArray('axis')
def tail_ratio(X: NDArray, alpha: float = 0.05, axis: int = 0) -> NDArray:
r""" Tail ratio of a return series.
Ratio of the magnitude of the right tail to the left tail:
:math:`|q_{1-\alpha}| / |q_{\alpha}|`. A value above 1 means the
upside tail is larger than the downside tail.
Parameters
----------
X : np.ndarray[dtype, ndim=1 or 2]
Series of returns.
alpha : float, optional
Tail quantile level. Default 0.05 (95th vs 5th percentile).
axis : {0, 1}, optional
Axis of computation. Default 0.
Returns
-------
float or np.ndarray
Tail ratio (0 when the left tail is exactly 0).
"""
hi = np.abs(np.quantile(X, 1.0 - alpha, axis=0))
lo = np.abs(np.quantile(X, alpha, axis=0))
return np.where(lo > 0, hi / np.where(lo > 0, lo, 1.0), 0.0)
[docs]
@WrapperArray('dtype')
def mad(X: NDArray, axis: int = 0, dtype=None) -> NDArray:
""" Compute the Mean Absolute Deviation of each `X`' series.
Compute the mean of the absolute value of the distance to the mean [6]_.
Parameters
----------
X : np.ndarray[np.dtype, ndim=1 or 2]
Time-series of prices, performances or index.
axis : {0, 1}, optional
Axis along wich the computation is done. Default is 0.
dtype : np.dtype, optional
The type of the output array. If `dtype` is not given, infer the data
type from `X` input.
Returns
-------
dtype or np.ndarray[dtype, ndim=1]
Values of mean absolute deviation of each series.
References
----------
.. [6] https://en.wikipedia.org/wiki/Average_absolute_deviation
Examples
--------
>>> X = np.array([70., 100., 90., 110., 150., 80.])
>>> mad(X)
20.0
See Also
--------
roll_mad
"""
return np.mean(np.abs(X.T - np.mean(X, axis=axis)).T, axis=axis)
[docs]
@WrapperArray('dtype', 'axis', 'window')
def roll_mad(X: NDArray, w: int | None = None, axis: int = 0, dtype=None) -> NDArray:
""" Compute rolling Mean Absolut Deviation for each `X`' series.
Compute the moving average of the absolute value of the distance to the
moving average [6]_.
Parameters
----------
X : np.ndarray[dtype, ndim=1 or 2]
Time series (price, performance or index).
w : int, optional
Size of the lagged window of the rolling function, must be positive. If
``w is None`` or ``w=0``, then ``w=X.shape[axis]``. Default is None.
axis : {0, 1}, optional
Axis along wich the computation is done. Default is 0.
dtype : np.dtype, optional
The type of the output array. If `dtype` is not given, infer the data
type from `X` input.
Returns
-------
np.ndarray[dtype, ndim=1 or 2]
Series of mean absolute deviation.
References
----------
.. [6] https://en.wikipedia.org/wiki/Average_absolute_deviation
Examples
--------
>>> X = np.array([70, 100, 90, 110, 150, 80])
>>> roll_mad(X, dtype=np.float64)
array([ 0. , 15. , 11.11111111, 12.5 , 20.8 ,
20. ])
>>> X = np.array([60, 100, 80, 120, 160, 80]).astype(np.float64)
>>> roll_mad(X, w=3, dtype=np.float64)
array([ 0. , 20. , 13.33333333, 13.33333333, 26.66666667,
26.66666667])
See Also
--------
mad
"""
if len(X.shape) == 2:
return np.asarray(roll_mad_cy_2d(X, w)) # type: ignore[name-defined]
return np.asarray(roll_mad_cy_1d(X, w)) # type: ignore[name-defined]