Source code for fynance.models.loss.sortino
#!/usr/bin/env python3
# coding: utf-8
""" Differentiable Sortino ratio loss for PyTorch training loops. """
from __future__ import annotations
# Third-party packages
import torch
import torch.nn.functional as F
# Local packages
from ._base import MAX_RATIO as _MAX_RATIO
from ._base import BaseLoss
__all__ = ['SortinoLoss']
[docs]
class SortinoLoss(BaseLoss):
r""" Negative Sortino ratio as a differentiable loss.
Minimizing this loss penalizes downside returns only, unlike
:class:`SharpeLoss` which penalizes both tails symmetrically.
Notes
-----
The loss is defined as:
.. math::
\mathcal{L} = -\frac{\mu(r - rf_p)}
{\sqrt{\mu(\text{ReLU}(-(r - rf_p))^2) + \varepsilon}}
where :math:`r` is ``y_pred`` and :math:`rf_p = rf / period`.
The denominator is a differentiable proxy for the downside deviation;
its magnitude differs from the numpy
:func:`~fynance.metrics.sortino` evaluation metric.
**This is a training proxy** — the value is not comparable to the
numpy :func:`~fynance.metrics.sortino` evaluation metric.
The downside deviation is :math:`O(\text{returns})`, so a fixed
absolute ``eps`` inside the square root is dimensionally wrong: on an
all-gains batch (zero downside) the loss would explode (e.g. ``-100``).
The downside is therefore floored with a **returns-scaled** value
``|excess|.mean() / MAX_RATIO`` (plus a bare ``eps`` backstop for the
degenerate all-zero batch). This caps the ratio at roughly
``MAX_RATIO`` in the low-downside regime regardless of the return
scale, keeping the loss finite and bounded while preserving the sign
convention.
The ratio is then passed through a **smooth saturating map**,
``MAX_RATIO * tanh(ratio / MAX_RATIO)``, instead of a hard clamp. A
hard clamp pins the loss to a constant on a low-downside batch and so
kills the gradient in exactly the strong-uptrend regime training still
wants to push on; ``tanh`` is near-linear for normal-regime ratios
(leaving their numerics unchanged) yet keeps a residual, non-zero
gradient when the ratio is large.
Parameters
----------
rf : float, optional
Annualized risk-free rate. Default is 0.
period : int, optional
Number of periods per year. Default is 252.
eps : float, optional
Bare numerical stabilizer added to the returns-scaled downside floor
as a backstop for the degenerate all-zero batch (see Notes).
Default is 1e-8.
Examples
--------
>>> import torch
>>> from fynance.models.loss import SortinoLoss
>>> returns = torch.tensor([-0.01, 0.02, 0.01, -0.005, 0.03])
>>> loss_fn = SortinoLoss()
>>> loss = loss_fn(returns)
>>> loss.shape
torch.Size([])
See Also
--------
SharpeLoss, DirectionalAccuracyLoss
"""
[docs]
def forward(
self, y_pred: torch.Tensor, y_true: torch.Tensor | None = None,
) -> torch.Tensor:
""" Compute the negative Sortino ratio.
Parameters
----------
y_pred : torch.Tensor
Predicted return series, shape ``(T,)`` or ``(T, M)``.
y_true : torch.Tensor, optional
Not used; accepted for API compatibility with PyTorch criterions.
Returns
-------
torch.Tensor
Scalar loss value (negative Sortino ratio proxy).
Raises
------
TypeError
If ``y_pred`` is not a :class:`torch.Tensor`.
"""
self._check_tensor(y_pred)
excess = y_pred - self._rf_per_period
downside = torch.sqrt(torch.mean(F.relu(-excess) ** 2))
# Floor the downside relative to the return scale: a fixed absolute eps
# inside the sqrt is dimensionally wrong for an O(returns) downside and
# lets the ratio explode on an all-gains batch. Scaling the floor by
# ``|excess|.mean() / MAX_RATIO`` caps the ratio at ~MAX_RATIO in the
# low-downside regime (scale-invariantly); ``eps`` backstops the
# degenerate all-zero case.
floor = excess.abs().mean() / _MAX_RATIO + self.eps
ratio = excess.mean() / torch.clamp(downside, min=floor)
# Smooth saturating map instead of a hard clamp: tanh is near-linear for
# normal-regime ratios (numerics unchanged) but keeps a non-zero
# gradient when the ratio is large, unlike a clamp that zeroes it.
return -_MAX_RATIO * torch.tanh(ratio / _MAX_RATIO)