Source code for fynance.research.guards

#!/usr/bin/env python
# -*- coding: utf-8 -*-

""" Statistical guardrails against spurious results.

When an agent searches over many strategies, the danger is **overfitting and
data-snooping**: a good in-sample Sharpe may be luck or leakage. These guards make
honest evaluation cheap — a permutation test (is the edge real?) and the
probabilistic / deflated Sharpe ratio (does it survive the number of trials?).

All functions are data-agnostic and operate on the existing maillons / plain
metrics; nothing here reads real data or stores results.

"""

# Built-in
from __future__ import annotations

from typing import Any

# Third-party
import numpy as np
from numpy.typing import NDArray
from scipy.stats import norm

# Local
from fynance.core import PriceSeries

__all__ = [
    'permutation_test',
    'probabilistic_sharpe_ratio',
    'deflated_sharpe_ratio',
]

# Euler-Mascheroni constant (for the expected maximum of N Gaussians).
_EULER = 0.5772156649015329


def _to_array(data: Any) -> NDArray[np.float64]:
    """ Coerce a PriceSeries / array-like to a 1-D float64 array. """
    if isinstance(data, PriceSeries):
        return data.to_numpy()

    return np.asarray(data, dtype=np.float64).reshape(-1)



[docs]
def permutation_test(
    strategy: Any,
    data: Any,
    *,
    metric: str = "sharpe",
    n_permutations: int = 200,
    seed: int = 0,
) -> dict[str, float]:
    """ Permutation test for a spurious edge.

    Runs the strategy on the real series, then on ``n_permutations`` shuffles of
    the asset's returns (which destroys any temporal structure). If the strategy
    scores as well on shuffled data as on the real data, its edge is not real.

    Parameters
    ----------
    strategy : fynance.strategy.Strategy
        The strategy to evaluate.
    data : PriceSeries or array-like
        Price series.
    metric : str
        Metric key from the run summary (default ``"sharpe"``).
    n_permutations : int
        Number of shuffles forming the null distribution.
    seed : int
        Seed for the shuffles and the runs.

    Returns
    -------
    dict
        ``observed``, ``p_value``, ``null_mean``, ``null_std``. The p-value is the
        (smoothed) fraction of shuffles scoring at least the observed metric.

    """
    # Imported here to avoid a runner <-> guards import cycle at module load.
    from fynance.research.runner import run_experiment

    prices = _to_array(data)
    log_ret = np.diff(np.log(prices))
    s0 = float(prices[0])

    observed = run_experiment(strategy, prices, name="observed",
                              seed=seed).metrics[metric]

    rng = np.random.default_rng(seed)
    null = np.empty(n_permutations, dtype=np.float64)
    for i in range(n_permutations):
        shuffled = rng.permutation(log_ret)
        path = s0 * np.exp(np.concatenate([[0.0], np.cumsum(shuffled)]))
        null[i] = run_experiment(strategy, path, name="perm",
                                 seed=seed).metrics[metric]

    # Smoothed p-value (never exactly 0): (#{null >= observed} + 1) / (n + 1).
    p_value = float((np.sum(null >= observed) + 1) / (n_permutations + 1))

    return {
        "observed": float(observed),
        "p_value": p_value,
        "null_mean": float(null.mean()),
        "null_std": float(null.std()),
    }




[docs]
def probabilistic_sharpe_ratio(
    sr: float,
    n_obs: int,
    *,
    sr_benchmark: float = 0.0,
    skew: float = 0.0,
    kurt: float = 3.0,
) -> float:
    """ Probabilistic Sharpe ratio (PSR).

    The probability that the true Sharpe exceeds ``sr_benchmark`` given the
    estimate ``sr`` from ``n_obs`` observations, correcting for the returns'
    skewness and kurtosis (Bailey & López de Prado).

    Parameters
    ----------
    sr : float
        Observed (non-annualized) Sharpe ratio.
    n_obs : int
        Number of return observations.
    sr_benchmark : float
        Benchmark Sharpe to beat.
    skew : float
        Skewness of the returns.
    kurt : float
        Kurtosis of the returns (3 for a normal distribution).

    Returns
    -------
    float
        PSR in ``[0, 1]``.

    """
    if n_obs <= 1:
        return float("nan")

    denom = np.sqrt(1.0 - skew * sr + (kurt - 1.0) / 4.0 * sr**2)
    z = (sr - sr_benchmark) * np.sqrt(n_obs - 1) / denom

    return float(norm.cdf(z))




[docs]
def deflated_sharpe_ratio(
    sr: float,
    n_obs: int,
    n_trials: int,
    *,
    skew: float = 0.0,
    kurt: float = 3.0,
    sr_variance: float = 1.0,
) -> float:
    """ Deflated Sharpe ratio (DSR).

    The PSR against a benchmark set to the **expected maximum** Sharpe of
    ``n_trials`` independent strategies — i.e. the probability the edge survives
    the multiple testing implied by trying ``n_trials`` strategies.

    Parameters
    ----------
    sr : float
        Observed (non-annualized) Sharpe ratio of the selected strategy.
    n_obs : int
        Number of return observations.
    n_trials : int
        Number of strategy configurations tried.
    skew, kurt : float
        Skewness / kurtosis of the selected strategy's returns.
    sr_variance : float
        Variance of the Sharpe estimates **across the trials**.

    Returns
    -------
    float
        DSR in ``[0, 1]``. Low values flag a likely overfit selection.

    """
    n = max(int(n_trials), 1)
    if n == 1:
        sr_star = 0.0
    else:
        # Expected max of N i.i.d. standard normals, scaled by the SR dispersion.
        gauss_max = ((1 - _EULER) * norm.ppf(1 - 1.0 / n)
                     + _EULER * norm.ppf(1 - 1.0 / (n * np.e)))
        sr_star = float(np.sqrt(sr_variance) * gauss_max)

    return probabilistic_sharpe_ratio(
        sr, n_obs, sr_benchmark=sr_star, skew=skew, kurt=kurt
    )