Source code for fynance.research.ledger
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Persistent experiment ledger.
A :class:`Ledger` turns one-off runs into **cumulative** research: it persists
experiments under a caller-provided ``root``, reloads them, ranks them into a
leaderboard, and tracks the **number of trials** — which it can feed into the
deflated Sharpe ratio so a selected strategy is judged against the multiple
testing it came from. The store lives entirely under ``root`` (the caller's
private repo) — never inside fynance.
"""
# Built-in
from __future__ import annotations
from pathlib import Path
# Third-party
import numpy as np
# Local
from fynance.research.compare import leaderboard
from fynance.research.experiment import Experiment
from fynance.research.guards import deflated_sharpe_ratio
__all__ = ['Ledger']
[docs]
class Ledger:
""" A persistent, append-only store of experiments under ``root``.
Parameters
----------
root : str or pathlib.Path
Directory the experiments live under (created on demand). Each experiment
is stored at ``<root>/<name>/experiment.json``.
Examples
--------
>>> import tempfile
>>> from fynance.research import Experiment, Ledger
>>> d = tempfile.mkdtemp()
>>> led = Ledger(d)
>>> _ = led.append(Experiment(name="a", metrics={"sharpe": 1.0}))
>>> _ = led.append(Experiment(name="b", metrics={"sharpe": 2.0}))
>>> led.n_trials
2
>>> [r["name"] for r in led.leaderboard()]
['b', 'a']
"""
def __init__(self, root: str | Path):
self.root = Path(root)
[docs]
def append(self, experiment: Experiment) -> Path:
""" Persist ``experiment`` under the ledger root; return its json path.
The store is **append-only**: appending an experiment whose ``name``
already exists raises :class:`FileExistsError` rather than silently
overwriting the prior run. Overwriting would undercount
:attr:`n_trials` and so deflate the multiple-testing correction fed to
the deflated Sharpe ratio. Pick a unique ``name`` (e.g. version-suffix
a re-run) before appending.
Parameters
----------
experiment : Experiment
The experiment to persist.
Returns
-------
pathlib.Path
Path to the written ``experiment.json``.
Raises
------
FileExistsError
If an experiment with the same ``name`` is already stored.
"""
self.root.mkdir(parents=True, exist_ok=True)
if (self.root / experiment.name / "experiment.json").exists():
raise FileExistsError(
f"experiment {experiment.name!r} already in the ledger; the "
f"store is append-only — use a unique name for a re-run"
)
return experiment.save(self.root)
[docs]
def load(self) -> list[Experiment]:
""" Load every stored experiment (sorted by name for determinism). """
if not self.root.exists():
return []
return [Experiment.load(p)
for p in sorted(self.root.glob("*/experiment.json"))]
@property
def n_trials(self) -> int:
""" Number of experiments in the ledger (the multiple-testing count). """
if not self.root.exists():
return 0
return sum(1 for _ in self.root.glob("*/experiment.json"))
[docs]
def leaderboard(self, *, sort_by: str = "sharpe",
descending: bool = True) -> list[dict[str, float | str]]:
""" Rank the stored experiments (see :func:`fynance.research.leaderboard`). """
return leaderboard(self.load(), sort_by=sort_by, descending=descending)
[docs]
def deflated_sharpe(self, experiment: Experiment,
metric: str = "sharpe") -> float:
""" Deflated Sharpe of ``experiment`` against the ledger's trial count.
Uses the ledger's :attr:`n_trials` as the number of trials and the
dispersion of the stored Sharpe metrics as the trial variance, so a
selected strategy is judged against the multiple testing it came from.
The stored ``sharpe`` metric is **annualized** (by the ``period`` used
at run time, recorded in ``spec``), whereas
:func:`~fynance.research.deflated_sharpe_ratio` expects a
**per-observation** Sharpe (it scales by ``sqrt(n_obs - 1)``
internally). This method therefore de-annualizes both the selected
strategy's Sharpe and the across-trial variance before the call:
``sr_obs = sr_annual / sqrt(period)`` and ``var_obs = var_annual /
period``. Skipping this de-annualization saturates the DSR to ~1
(a modest per-period edge looks certain), defeating the guard.
Parameters
----------
experiment : Experiment
The selected experiment to deflate.
metric : str
Metric key holding the (annualized) Sharpe (default ``"sharpe"``).
Returns
-------
float
Deflated Sharpe ratio in ``[0, 1]``.
"""
period = self._period(experiment)
sharpes = [float(e.metrics[metric]) / np.sqrt(self._period(e))
for e in self.load() if metric in e.metrics]
sr_variance = float(np.var(sharpes)) if len(sharpes) > 1 else 1.0
n_obs = len(experiment.series["returns"]) if (
experiment.series and experiment.series.get("returns")) else 0
return deflated_sharpe_ratio(
float(experiment.metrics[metric]) / np.sqrt(period), n_obs,
max(self.n_trials, 1), sr_variance=sr_variance,
)
@staticmethod
def _period(experiment: Experiment) -> float:
""" Annualization factor used to compute ``experiment``'s Sharpe. """
period = experiment.spec.get("period") if experiment.spec else None
try:
value = float(period) # type: ignore[arg-type]
except (TypeError, ValueError):
return 252.0
return value if value > 0.0 else 252.0