Source code for AIModels.ClimFormerAttn

import torch
import torch.nn as nn
from typing import Optional
import transformers as tr

[docs] class FeatureAttention(nn.Module): """ A simple additive attention over the *feature* dimension. Given an input tensor of shape `(batch, seq_len, feature_dim)`, this layer learns a set of attention weights for each feature *per time step* and returns the element‑wise product `values * attn_weights` together with the weights themselves. The formulation is deliberately lightweight so that it can be plugged in *before* the standard Informer embedding/projection logic. """ def __init__(self, feature_dim: int, hidden_dim: Optional[int] = None): super().__init__() hidden_dim = hidden_dim or feature_dim self.proj1 = nn.Linear(feature_dim, hidden_dim) self.proj2 = nn.Linear(hidden_dim, feature_dim) self.activation = nn.Tanh() self.softmax = nn.Softmax(dim=-1)
[docs] def forward(self, values: torch.Tensor): # values: (B, T, F) scores = self.proj2(self.activation(self.proj1(values))) # (B, T, F) attn_weights = self.softmax(scores) # (B, T, F) return values * attn_weights, attn_weights
[docs] class ClimFormer(tr.InformerForPrediction): """ InformerForPrediction with *feature wise* attention preprocessing. The attention layer rescales the raw *past* and (optionally) *future* value tensors along the feature dimension before they are consumed by the Informer architecture. Everything else including lagged subsequence construction, scaling, encoder/decoder stacks, and distribution head is left untouched by delegating to `super().forward`. """ def __init__(self, config): super().__init__(config) # ``config.input_size`` is the number of input variables (features) self.feature_attention = FeatureAttention(config.input_size) def _apply_feature_attention(self, values: torch.Tensor) -> torch.Tensor: weighted_values, _ = self.feature_attention(values) return weighted_values
[docs] def forward( self, past_values: torch.Tensor, past_time_features: torch.Tensor, past_observed_mask: torch.Tensor, static_categorical_features: Optional[torch.Tensor] = None, static_real_features: Optional[torch.Tensor] = None, future_values: Optional[torch.Tensor] = None, future_time_features: Optional[torch.Tensor] = None, future_observed_mask: Optional[torch.Tensor] = None, **kwargs, ): # 1) Apply feature‑wise attention past_values = self._apply_feature_attention(past_values) if future_values is not None: future_values = self._apply_feature_attention(future_values) # 2) Call the original Informer forward pass return super().forward( past_values=past_values, past_time_features=past_time_features, past_observed_mask=past_observed_mask, static_categorical_features=static_categorical_features, static_real_features=static_real_features, future_values=future_values, future_time_features=future_time_features, future_observed_mask=future_observed_mask, **kwargs, )