#!/usr/bin/env python3
# coding: utf-8
""" Long Short-Term Memory (LSTM) model.
Defines :class:`LSTMCell`, a composable LSTM building block, and
:class:`LongShortTermMemory`, a full LSTM model with output projection.
The internal :class:`_LSTMCell` holds the four LSTM gates (forget,
input/update, candidate, output) and is the common base for both.
The distinction mirrors PyTorch's own ``torch.nn.LSTMCell`` vs
``torch.nn.LSTM``: :class:`LSTMCell` is the raw cell (useful for
composing larger architectures such as TCN or Transformer encoders),
while :class:`LongShortTermMemory` wraps it with an output projection
and training helpers.
Main entry points
-----------------
- :class:`LSTMCell` — composable LSTM cell without output projection.
- :class:`LongShortTermMemory` — LSTM model ready for walk-forward
training via :meth:`~fynance.models._base.BaseNeuralNet.set_optimizer`.
References
----------
.. [1] Hochreiter, S. & Schmidhuber, J. (1997). Long Short-Term Memory.
Neural Computation, 9(8), 1735–1780.
"""
from __future__ import annotations
# Third-party packages
import torch
from torch import nn
# Local packages
from fynance.models._recurrent_base import _OutputLayerMixin, _RecurrentBase
__all__ = ['LSTMCell', 'LongShortTermMemory']
class _LSTMCell(_RecurrentBase):
""" LSTM cell: four gates without output projection.
Implements the Long Short-Term Memory forward pass (Hochreiter &
Schmidhuber, 1997) with forget gate ``G_f``, input gate ``G_i``,
candidate cell ``C_tild``, and output gate ``G_o``. Returns the
updated hidden state ``H`` and cell state ``C`` — no output layer.
Use :class:`LongShortTermMemory` for a complete model with output
projection and training helpers.
Parameters
----------
X, y : array-like or int
- If it's an array-like, respectively inputs and outputs data.
- If it's an integer, respectively dimension of inputs and outputs.
drop : float, optional
Probability of an element to be zeroed.
hidden_activation, memory_activation : torch.nn.Module, optional
Activation functions for respectively hidden and memory state,
default both are Tanh function.
hidden_state_size, memory_state_size : int, optional
Size of respectively hidden and memory states. Default hidden
state is the same size as input; default memory state is the
same size as hidden state.
forget_activation, update_activation, output_activation :
torch.nn.Module, optional
Activation functions for respectively forget, update and output
gate, default are Sigmoid function for all three.
Attributes
----------
W_f, W_i, W_o, W_c : torch.nn.Linear
Respectively forget, update and output gate weights and weight to
compute the candidate value for cell memory.
f_f, f_i, f_o, f_c : torch.nn.Module
Respectively activation function for forget, update and output gate
and activation function to compute the candidate value for cell
memory.
See Also
--------
LongShortTermMemory,
fynance.models.gru._GRUCell
"""
def __init__(
self, X, y=None, drop=None, x_type=None, y_type=None, bias=True,
hidden_activation=nn.Tanh, hidden_state_size=None,
memory_activation=nn.Tanh, memory_state_size=None,
forget_activation=nn.Sigmoid, update_activation=nn.Sigmoid,
output_activation=nn.Sigmoid,
):
_RecurrentBase.__init__(
self,
X,
y,
drop=drop,
x_type=x_type,
y_type=y_type,
bias=bias,
hidden_activation=hidden_activation,
hidden_state_size=hidden_state_size,
)
self.C = self.H if memory_state_size is None else memory_state_size
# Forget gate
self.W_f = nn.Linear(self.N + self.H, self.C)
self.f_f = forget_activation()
# Update gate
self.W_i = nn.Linear(self.N + self.H, self.C)
self.f_i = update_activation()
# Candidate value
self.W_c = nn.Linear(self.N + self.H, self.C)
self.f_c = memory_activation()
# Output gate
self.W_o = nn.Linear(self.N + self.H, self.C)
self.f_o = output_activation()
# Hidden activation (applied to cell state before output gate)
self.f_h = hidden_activation()
def forward(self, X, H, C):
X_H = torch.cat([X, H], dim=1)
# Forget gate
G_f = self.f_f(self.W_f(self.drop(X_H)))
# Candidate value
C_tild = self.f_c(self.W_c(self.drop(X_H)))
# Update gate
G_i = self.f_i(self.W_i(self.drop(X_H)))
C = G_f * C + G_i * C_tild
# Output gate
G_o = self.f_o(self.W_o(self.drop(X_H)))
H = G_o * self.f_h(C)
return H, C
[docs]
class LSTMCell(_LSTMCell):
""" LSTM cell — public composable building block.
Implements the four LSTM gates (forget, input, candidate, output)
without an output projection layer. Designed to be composed inside
larger architectures (TCN, Transformers, encoder-decoders). For a
standalone trainable model with output projection, use
:class:`LongShortTermMemory`.
Parameters
----------
X : int or array-like
Input dimension (int) or input data. When passing an int, ``y``
may be omitted.
y : array-like or int, optional
Output data or output dimension. Not required when using the
cell as a building block.
hidden_state_size : int, optional
Size of the hidden state. Defaults to the input size.
memory_state_size : int, optional
Size of the cell state. Defaults to hidden state size.
drop : float, optional
Dropout probability applied before each gate.
hidden_activation, memory_activation : torch.nn.Module, optional
Activations for hidden and cell state (default: Tanh for both).
forget_activation, update_activation, output_activation :
torch.nn.Module, optional
Gate activations (default: Sigmoid for all three).
Examples
--------
>>> import torch
>>> from fynance.models.lstm import LSTMCell
>>> cell = LSTMCell(8, hidden_state_size=16)
>>> H = torch.zeros(4, 16)
>>> C = torch.zeros(4, 16)
>>> X = torch.randn(4, 8)
>>> H_new, C_new = cell(X, H, C)
>>> H_new.shape
torch.Size([4, 16])
See Also
--------
LongShortTermMemory : full model with output projection and training.
fynance.models.gru.GRUCell : GRU variant.
"""
def train_on(self, *args, **kwargs):
raise NotImplementedError(
"LSTMCell is a composable building block with no output projection. "
"Use LongShortTermMemory for a standalone trainable model."
)
def predict(self, *args, **kwargs):
raise NotImplementedError(
"LSTMCell is a composable building block with no output projection. "
"Use LongShortTermMemory for a standalone trainable model."
)
[docs]
class LongShortTermMemory(_OutputLayerMixin, LSTMCell):
""" Long Short-Term Memory neural network.
Full LSTM model: :class:`_LSTMCell` four-gate architecture followed
by a forward output projection. The cell state ``C`` and hidden
state ``H`` are threaded through the sequence, allowing the model to
carry information across many time steps without the
vanishing-gradient pathology that limits
:class:`~fynance.models.rnn.RecurrentNeuralNetwork`. Use it for
sequence modeling tasks where dependencies span dozens of steps
(intraday return series, multi-day momentum signals, regime
detection).
Parameters
----------
X, y : array-like or int
- If it's an array-like, respectively inputs and outputs data.
- If it's an integer, respectively dimension of inputs and outputs.
drop : float, optional
Probability of an element to be zeroed.
forward_activation : torch.nn.Module, optional
Activation functions, default is Softmax.
hidden_activation, memory_activation : torch.nn.Module, optional
Activation functions for respectively hidden and memory state,
default both are Tanh function.
hidden_state_size, memory_state_size : int, optional
Size of respectively hidden and memory states. Default hidden
state is the same size as input; default memory state is the
same size as hidden state.
forget_activation, update_activation, output_activation :
torch.nn.Module, optional
Activation functions for respectively forget, update and output
gate, default are Sigmoid function for all three.
Attributes
----------
criterion : torch.nn.modules.loss
A loss function.
optimizer : torch.optim
An optimizer algorithm.
W_f, W_i, W_o, W_c, W_y : torch.nn.Linear
Respectively forget, update and output gate weights, weight to
compute the candidate value for cell memory and forward weight.
f_f, f_i, f_o, f_c, f_y : torch.nn.Module
Respectively activation function for forget, update and output gate,
activation function to compute the candidate value for cell memory
and forward activation function.
See Also
--------
fynance.models.rnn.RecurrentNeuralNetwork,
fynance.models.gru.GatedRecurrentUnit
"""
def __init__(
self, X, y, drop=None, x_type=None, y_type=None, bias=True,
forward_activation=nn.Softmax, hidden_activation=nn.Tanh,
hidden_state_size=None, memory_activation=nn.Tanh,
memory_state_size=None, forget_activation=nn.Sigmoid,
update_activation=nn.Sigmoid, output_activation=nn.Sigmoid,
):
LSTMCell.__init__(
self,
X,
y,
drop=drop,
x_type=x_type,
y_type=y_type,
bias=bias,
hidden_activation=hidden_activation,
hidden_state_size=hidden_state_size,
memory_activation=memory_activation,
memory_state_size=memory_state_size,
forget_activation=forget_activation,
update_activation=update_activation,
output_activation=output_activation,
)
_OutputLayerMixin.__init__(self, forward_activation=forward_activation)
[docs]
def forward(self, X, H, C):
""" Forward method.
Parameters
----------
X, H, C : torch.Tensor
Respectively input data, hidden state and memory state.
Returns
-------
torch.Tensor
Output data.
torch.Tensor
Hidden state.
torch.Tensor
Memory state.
"""
H, C = super().forward(X, H, C)
Y = self.f_y(self.W_y(self.drop(H)))
return Y, H, C
[docs]
@torch.enable_grad()
def train_on(self, X: torch.Tensor, y: torch.Tensor, H: torch.Tensor, C: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
""" Trains the neural network model.
Parameters
----------
X, y, H, C : torch.Tensor
Respectively inputs, outputs, states and cell memory to train
model.
Returns
-------
torch.nn.modules.loss
Loss outputs.
torch.Tensor
Updated states of the model.
torch.Tensor
Cell memory of the model.
"""
self.optimizer.zero_grad()
outputs, H, C = self(X, H, C)
loss = self.criterion(outputs, y)
loss.backward()
self.optimizer.step()
if self.lr_scheduler:
self.lr_scheduler.step()
return loss, H.detach(), C.detach()
[docs]
@torch.no_grad()
def predict(self, X: torch.Tensor, H: torch.Tensor, C: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
""" Predicts outputs of neural network model.
Parameters
----------
X : torch.Tensor
Inputs to compute prediction.
H : torch.Tensor
States of the model.
C : torch.Tensor
Cell memory of the model.
Returns
-------
torch.Tensor
Outputs prediction.
torch.Tensor
Updated states of the model.
torch.Tensor
Cell memory of the model.
"""
Y, H, C = self(X, H, C)
return Y.detach(), H.detach(), C.detach()