Source code for fynance.research.ledger

#!/usr/bin/env python
# -*- coding: utf-8 -*-

""" Persistent experiment ledger.

A :class:`Ledger` turns one-off runs into **cumulative** research: it persists
experiments under a caller-provided ``root``, reloads them, ranks them into a
leaderboard, and tracks the **number of trials** — which it can feed into the
deflated Sharpe ratio so a selected strategy is judged against the multiple
testing it came from. The store lives entirely under ``root`` (the caller's
private repo) — never inside fynance.

"""

# Built-in
from __future__ import annotations

from pathlib import Path

# Third-party
import numpy as np

# Local
from fynance.research.compare import leaderboard
from fynance.research.experiment import Experiment
from fynance.research.guards import deflated_sharpe_ratio

__all__ = ['Ledger']


[docs] class Ledger: """ A persistent, append-only store of experiments under ``root``. Parameters ---------- root : str or pathlib.Path Directory the experiments live under (created on demand). Each experiment is stored at ``<root>/<name>/experiment.json``. Examples -------- >>> import tempfile >>> from fynance.research import Experiment, Ledger >>> d = tempfile.mkdtemp() >>> led = Ledger(d) >>> _ = led.append(Experiment(name="a", metrics={"sharpe": 1.0})) >>> _ = led.append(Experiment(name="b", metrics={"sharpe": 2.0})) >>> led.n_trials 2 >>> [r["name"] for r in led.leaderboard()] ['b', 'a'] """ def __init__(self, root: str | Path): self.root = Path(root)
[docs] def append(self, experiment: Experiment) -> Path: """ Persist ``experiment`` under the ledger root; return its json path. The store is **append-only**: appending an experiment whose ``name`` already exists raises :class:`FileExistsError` rather than silently overwriting the prior run. Overwriting would undercount :attr:`n_trials` and so deflate the multiple-testing correction fed to the deflated Sharpe ratio. Pick a unique ``name`` (e.g. version-suffix a re-run) before appending. Parameters ---------- experiment : Experiment The experiment to persist. Returns ------- pathlib.Path Path to the written ``experiment.json``. Raises ------ FileExistsError If an experiment with the same ``name`` is already stored. """ self.root.mkdir(parents=True, exist_ok=True) if (self.root / experiment.name / "experiment.json").exists(): raise FileExistsError( f"experiment {experiment.name!r} already in the ledger; the " f"store is append-only — use a unique name for a re-run" ) return experiment.save(self.root)
[docs] def load(self) -> list[Experiment]: """ Load every stored experiment (sorted by name for determinism). """ if not self.root.exists(): return [] return [Experiment.load(p) for p in sorted(self.root.glob("*/experiment.json"))]
@property def n_trials(self) -> int: """ Number of experiments in the ledger (the multiple-testing count). """ if not self.root.exists(): return 0 return sum(1 for _ in self.root.glob("*/experiment.json"))
[docs] def leaderboard(self, *, sort_by: str = "sharpe", descending: bool = True) -> list[dict[str, float | str]]: """ Rank the stored experiments (see :func:`fynance.research.leaderboard`). """ return leaderboard(self.load(), sort_by=sort_by, descending=descending)
[docs] def deflated_sharpe(self, experiment: Experiment, metric: str = "sharpe") -> float: """ Deflated Sharpe of ``experiment`` against the ledger's trial count. Uses the ledger's :attr:`n_trials` as the number of trials and the dispersion of the stored Sharpe metrics as the trial variance, so a selected strategy is judged against the multiple testing it came from. The stored ``sharpe`` metric is **annualized** (by the ``period`` used at run time, recorded in ``spec``), whereas :func:`~fynance.research.deflated_sharpe_ratio` expects a **per-observation** Sharpe (it scales by ``sqrt(n_obs - 1)`` internally). This method therefore de-annualizes both the selected strategy's Sharpe and the across-trial variance before the call: ``sr_obs = sr_annual / sqrt(period)`` and ``var_obs = var_annual / period``. Skipping this de-annualization saturates the DSR to ~1 (a modest per-period edge looks certain), defeating the guard. Parameters ---------- experiment : Experiment The selected experiment to deflate. metric : str Metric key holding the (annualized) Sharpe (default ``"sharpe"``). Returns ------- float Deflated Sharpe ratio in ``[0, 1]``. """ period = self._period(experiment) sharpes = [float(e.metrics[metric]) / np.sqrt(self._period(e)) for e in self.load() if metric in e.metrics] sr_variance = float(np.var(sharpes)) if len(sharpes) > 1 else 1.0 n_obs = len(experiment.series["returns"]) if ( experiment.series and experiment.series.get("returns")) else 0 return deflated_sharpe_ratio( float(experiment.metrics[metric]) / np.sqrt(period), n_obs, max(self.n_trials, 1), sr_variance=sr_variance, )
@staticmethod def _period(experiment: Experiment) -> float: """ Annualization factor used to compute ``experiment``'s Sharpe. """ period = experiment.spec.get("period") if experiment.spec else None try: value = float(period) # type: ignore[arg-type] except (TypeError, ValueError): return 252.0 return value if value > 0.0 else 252.0