mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-02 03:01:07 +06:00
fix style
This commit is contained in:
parent
83d39df0b1
commit
fdffeb819c
@ -290,6 +290,10 @@ _import_structure = {
|
||||
"models.hubert": ["HUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "HubertConfig"],
|
||||
"models.ibert": ["IBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "IBertConfig"],
|
||||
"models.imagegpt": ["IMAGEGPT_PRETRAINED_CONFIG_ARCHIVE_MAP", "ImageGPTConfig"],
|
||||
"models.informer": [
|
||||
"INFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP",
|
||||
"InformerConfig",
|
||||
],
|
||||
"models.jukebox": [
|
||||
"JUKEBOX_PRETRAINED_CONFIG_ARCHIVE_MAP",
|
||||
"JukeboxConfig",
|
||||
@ -414,10 +418,6 @@ _import_structure = {
|
||||
"TIME_SERIES_TRANSFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP",
|
||||
"TimeSeriesTransformerConfig",
|
||||
],
|
||||
"models.informer": [
|
||||
"INFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP",
|
||||
"InformerConfig",
|
||||
],
|
||||
"models.timesformer": ["TIMESFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "TimesformerConfig"],
|
||||
"models.trajectory_transformer": [
|
||||
"TRAJECTORY_TRANSFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP",
|
||||
@ -1621,6 +1621,14 @@ else:
|
||||
"load_tf_weights_in_imagegpt",
|
||||
]
|
||||
)
|
||||
_import_structure["models.informer"].extend(
|
||||
[
|
||||
"INFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"InformerForPrediction",
|
||||
"InformerModel",
|
||||
"InformerPreTrainedModel",
|
||||
]
|
||||
)
|
||||
_import_structure["models.jukebox"].extend(
|
||||
[
|
||||
"JUKEBOX_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
@ -2275,14 +2283,6 @@ else:
|
||||
"TimeSeriesTransformerPreTrainedModel",
|
||||
]
|
||||
)
|
||||
_import_structure["models.informer"].extend(
|
||||
[
|
||||
"INFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"InformerForPrediction",
|
||||
"InformerModel",
|
||||
"InformerPreTrainedModel",
|
||||
]
|
||||
)
|
||||
_import_structure["models.timesformer"].extend(
|
||||
[
|
||||
"TIMESFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
@ -3741,6 +3741,7 @@ if TYPE_CHECKING:
|
||||
from .models.hubert import HUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, HubertConfig
|
||||
from .models.ibert import IBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, IBertConfig
|
||||
from .models.imagegpt import IMAGEGPT_PRETRAINED_CONFIG_ARCHIVE_MAP, ImageGPTConfig
|
||||
from .models.informer import INFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, InformerConfig
|
||||
from .models.jukebox import (
|
||||
JUKEBOX_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
JukeboxConfig,
|
||||
@ -3855,10 +3856,6 @@ if TYPE_CHECKING:
|
||||
TIME_SERIES_TRANSFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
TimeSeriesTransformerConfig,
|
||||
)
|
||||
from .models.informer import (
|
||||
INFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
InformerConfig,
|
||||
)
|
||||
from .models.timesformer import TIMESFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, TimesformerConfig
|
||||
from .models.trajectory_transformer import (
|
||||
TRAJECTORY_TRANSFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
@ -4865,6 +4862,12 @@ if TYPE_CHECKING:
|
||||
ImageGPTPreTrainedModel,
|
||||
load_tf_weights_in_imagegpt,
|
||||
)
|
||||
from .models.informer import (
|
||||
INFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
InformerForPrediction,
|
||||
InformerModel,
|
||||
InformerPreTrainedModel,
|
||||
)
|
||||
from .models.jukebox import (
|
||||
JUKEBOX_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
JukeboxModel,
|
||||
@ -5393,12 +5396,6 @@ if TYPE_CHECKING:
|
||||
TimeSeriesTransformerModel,
|
||||
TimeSeriesTransformerPreTrainedModel,
|
||||
)
|
||||
from .models.informer import (
|
||||
INFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
InformerForPrediction,
|
||||
InformerModel,
|
||||
InformerPreTrainedModel,
|
||||
)
|
||||
from .models.timesformer import (
|
||||
TIMESFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
TimesformerForVideoClassification,
|
||||
|
@ -90,6 +90,7 @@ from . import (
|
||||
hubert,
|
||||
ibert,
|
||||
imagegpt,
|
||||
informer,
|
||||
jukebox,
|
||||
layoutlm,
|
||||
layoutlmv2,
|
||||
@ -165,7 +166,6 @@ from . import (
|
||||
tapas,
|
||||
tapex,
|
||||
time_series_transformer,
|
||||
informer,
|
||||
timesformer,
|
||||
trajectory_transformer,
|
||||
transfo_xl,
|
||||
|
@ -93,6 +93,7 @@ CONFIG_MAPPING_NAMES = OrderedDict(
|
||||
("hubert", "HubertConfig"),
|
||||
("ibert", "IBertConfig"),
|
||||
("imagegpt", "ImageGPTConfig"),
|
||||
("informer", "InformerConfig"),
|
||||
("jukebox", "JukeboxConfig"),
|
||||
("layoutlm", "LayoutLMConfig"),
|
||||
("layoutlmv2", "LayoutLMv2Config"),
|
||||
@ -161,7 +162,6 @@ CONFIG_MAPPING_NAMES = OrderedDict(
|
||||
("table-transformer", "TableTransformerConfig"),
|
||||
("tapas", "TapasConfig"),
|
||||
("time_series_transformer", "TimeSeriesTransformerConfig"),
|
||||
("informer", "InformerConfig"),
|
||||
("timesformer", "TimesformerConfig"),
|
||||
("trajectory_transformer", "TrajectoryTransformerConfig"),
|
||||
("transfo-xl", "TransfoXLConfig"),
|
||||
@ -258,6 +258,7 @@ CONFIG_ARCHIVE_MAP_MAPPING_NAMES = OrderedDict(
|
||||
("hubert", "HUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
("ibert", "IBERT_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
("imagegpt", "IMAGEGPT_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
("informer", "INFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
("jukebox", "JUKEBOX_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
("layoutlm", "LAYOUTLM_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
("layoutlmv2", "LAYOUTLMV2_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
@ -319,7 +320,6 @@ CONFIG_ARCHIVE_MAP_MAPPING_NAMES = OrderedDict(
|
||||
("table-transformer", "TABLE_TRANSFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
("tapas", "TAPAS_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
("time_series_transformer", "TIME_SERIES_TRANSFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
("informer", "INFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
("timesformer", "TIMESFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
("transfo-xl", "TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
("unispeech", "UNISPEECH_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
@ -424,6 +424,7 @@ MODEL_NAMES_MAPPING = OrderedDict(
|
||||
("hubert", "Hubert"),
|
||||
("ibert", "I-BERT"),
|
||||
("imagegpt", "ImageGPT"),
|
||||
("informer", "Informer"),
|
||||
("jukebox", "Jukebox"),
|
||||
("layoutlm", "LayoutLM"),
|
||||
("layoutlmv2", "LayoutLMv2"),
|
||||
@ -500,7 +501,6 @@ MODEL_NAMES_MAPPING = OrderedDict(
|
||||
("tapas", "TAPAS"),
|
||||
("tapex", "TAPEX"),
|
||||
("time_series_transformer", "Time Series Transformer"),
|
||||
("informer", "Informer"),
|
||||
("timesformer", "TimeSformer"),
|
||||
("trajectory_transformer", "Trajectory Transformer"),
|
||||
("transfo-xl", "Transformer-XL"),
|
||||
|
@ -92,6 +92,7 @@ MODEL_MAPPING_NAMES = OrderedDict(
|
||||
("hubert", "HubertModel"),
|
||||
("ibert", "IBertModel"),
|
||||
("imagegpt", "ImageGPTModel"),
|
||||
("informer", "InformerModel"),
|
||||
("jukebox", "JukeboxModel"),
|
||||
("layoutlm", "LayoutLMModel"),
|
||||
("layoutlmv2", "LayoutLMv2Model"),
|
||||
@ -157,7 +158,6 @@ MODEL_MAPPING_NAMES = OrderedDict(
|
||||
("table-transformer", "TableTransformerModel"),
|
||||
("tapas", "TapasModel"),
|
||||
("time_series_transformer", "TimeSeriesTransformerModel"),
|
||||
("informer", "InformerModel"),
|
||||
("timesformer", "TimesformerModel"),
|
||||
("trajectory_transformer", "TrajectoryTransformerModel"),
|
||||
("transfo-xl", "TransfoXLModel"),
|
||||
|
@ -43,10 +43,7 @@ else:
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .configuration_informer import (
|
||||
INFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
InformerConfig,
|
||||
)
|
||||
from .configuration_informer import INFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, InformerConfig
|
||||
|
||||
try:
|
||||
if not is_torch_available():
|
||||
|
@ -27,13 +27,11 @@ INFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
||||
}
|
||||
|
||||
|
||||
|
||||
class InformerConfig(PretrainedConfig):
|
||||
r"""
|
||||
This is the configuration class to store the configuration of a [`InformerModel`]. It is used to
|
||||
instantiate an Informer model according to the specified arguments, defining the model architecture.
|
||||
Instantiating a configuration with the defaults will yield a similar configuration to that of the Time Series
|
||||
Transformer
|
||||
This is the configuration class to store the configuration of a [`InformerModel`]. It is used to instantiate an
|
||||
Informer model according to the specified arguments, defining the model architecture. Instantiating a configuration
|
||||
with the defaults will yield a similar configuration to that of the Time Series Transformer
|
||||
[huggingface/time-series-transformer-tourism-monthly](https://huggingface.co/huggingface/time-series-transformer-tourism-monthly)
|
||||
architecture.
|
||||
|
||||
@ -136,47 +134,47 @@ class InformerConfig(PretrainedConfig):
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_size: int = 1,
|
||||
prediction_length: Optional[int] = None,
|
||||
context_length: Optional[int] = None,
|
||||
distribution_output: str = "student_t",
|
||||
loss: str = "nll",
|
||||
lags_sequence: List[int] = None,
|
||||
scaling: bool = True,
|
||||
num_dynamic_real_features: int = 0,
|
||||
num_static_real_features: int = 0,
|
||||
num_static_categorical_features: int = 0,
|
||||
num_time_features: int = 0,
|
||||
cardinality: Optional[List[int]] = None,
|
||||
embedding_dimension: Optional[List[int]] = None,
|
||||
encoder_ffn_dim: int = 32,
|
||||
decoder_ffn_dim: int = 32,
|
||||
encoder_attention_heads: int = 2,
|
||||
decoder_attention_heads: int = 2,
|
||||
encoder_layers: int = 2,
|
||||
decoder_layers: int = 2,
|
||||
is_encoder_decoder: bool = True,
|
||||
activation_function: str = "gelu",
|
||||
dropout: float = 0.05,
|
||||
encoder_layerdrop: float = 0.1,
|
||||
decoder_layerdrop: float = 0.1,
|
||||
attention_dropout: float = 0.1,
|
||||
activation_dropout: float = 0.1,
|
||||
num_parallel_samples: int = 100,
|
||||
init_std: float = 0.02,
|
||||
use_cache=True,
|
||||
# Informer arguments
|
||||
attn: str = "prob",
|
||||
factor: int = 5,
|
||||
distil: bool = True,
|
||||
**kwargs
|
||||
self,
|
||||
input_size: int = 1,
|
||||
prediction_length: Optional[int] = None,
|
||||
context_length: Optional[int] = None,
|
||||
distribution_output: str = "student_t",
|
||||
loss: str = "nll",
|
||||
lags_sequence: List[int] = None,
|
||||
scaling: bool = True,
|
||||
num_dynamic_real_features: int = 0,
|
||||
num_static_real_features: int = 0,
|
||||
num_static_categorical_features: int = 0,
|
||||
num_time_features: int = 0,
|
||||
cardinality: Optional[List[int]] = None,
|
||||
embedding_dimension: Optional[List[int]] = None,
|
||||
encoder_ffn_dim: int = 32,
|
||||
decoder_ffn_dim: int = 32,
|
||||
encoder_attention_heads: int = 2,
|
||||
decoder_attention_heads: int = 2,
|
||||
encoder_layers: int = 2,
|
||||
decoder_layers: int = 2,
|
||||
is_encoder_decoder: bool = True,
|
||||
activation_function: str = "gelu",
|
||||
dropout: float = 0.05,
|
||||
encoder_layerdrop: float = 0.1,
|
||||
decoder_layerdrop: float = 0.1,
|
||||
attention_dropout: float = 0.1,
|
||||
activation_dropout: float = 0.1,
|
||||
num_parallel_samples: int = 100,
|
||||
init_std: float = 0.02,
|
||||
use_cache=True,
|
||||
# Informer arguments
|
||||
attn: str = "prob",
|
||||
factor: int = 5,
|
||||
distil: bool = True,
|
||||
**kwargs
|
||||
):
|
||||
# time series specific configuration
|
||||
self.prediction_length = prediction_length
|
||||
self.context_length = context_length or prediction_length
|
||||
self.distribution_output = distribution_output
|
||||
self.loss = loss # Eli: From vanilla ts transformer
|
||||
self.loss = loss # Eli: From vanilla ts transformer
|
||||
self.input_size = input_size
|
||||
self.num_time_features = num_time_features
|
||||
self.lags_sequence = lags_sequence
|
||||
|
@ -17,9 +17,12 @@
|
||||
|
||||
import random
|
||||
from dataclasses import dataclass
|
||||
from math import sqrt
|
||||
from typing import Callable, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from torch.distributions import (
|
||||
AffineTransform,
|
||||
@ -37,11 +40,6 @@ from ...modeling_utils import PreTrainedModel
|
||||
from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
|
||||
from .configuration_informer import InformerConfig
|
||||
|
||||
from math import sqrt
|
||||
from typing import List, Optional
|
||||
|
||||
import numpy as np
|
||||
import torch.nn.functional as F
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
@ -54,7 +52,6 @@ INFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
]
|
||||
|
||||
|
||||
|
||||
class AffineTransformed(TransformedDistribution):
|
||||
def __init__(self, base_distribution: Distribution, loc=None, scale=None, event_dim=0):
|
||||
self.scale = 1.0 if scale is None else scale
|
||||
@ -472,6 +469,7 @@ class Seq2SeqTimeSeriesModelOutput(ModelOutput):
|
||||
scale: Optional[torch.FloatTensor] = None
|
||||
static_features: Optional[torch.FloatTensor] = None
|
||||
|
||||
|
||||
# Copied from transformers.models.time_series_transformer.modeling_time_series_transformer
|
||||
@dataclass
|
||||
class Seq2SeqTimeSeriesPredictionOutput(ModelOutput):
|
||||
@ -540,6 +538,7 @@ class Seq2SeqTimeSeriesPredictionOutput(ModelOutput):
|
||||
scale: Optional[torch.FloatTensor] = None
|
||||
static_features: Optional[torch.FloatTensor] = None
|
||||
|
||||
|
||||
# Copied from transformers.models.time_series_transformer.modeling_time_series_transformer
|
||||
@dataclass
|
||||
class SampleTimeSeriesPredictionOutput(ModelOutput):
|
||||
@ -554,9 +553,7 @@ class TriangularCausalMask:
|
||||
def __init__(self, B, L, device="cpu"):
|
||||
mask_shape = [B, 1, L, L]
|
||||
with torch.no_grad():
|
||||
self._mask = torch.triu(
|
||||
torch.ones(mask_shape, dtype=torch.bool), diagonal=1
|
||||
).to(device)
|
||||
self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
|
||||
|
||||
@property
|
||||
def mask(self):
|
||||
@ -568,9 +565,7 @@ class ProbMask:
|
||||
def __init__(self, B, H, L, index, scores, device="cpu"):
|
||||
_mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
|
||||
_mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
|
||||
indicator = _mask_ex[
|
||||
torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :
|
||||
].to(device)
|
||||
indicator = _mask_ex[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :].to(device)
|
||||
self._mask = indicator.view(scores.shape).to(device)
|
||||
|
||||
@property
|
||||
@ -597,7 +592,7 @@ class FullAttention(nn.Module):
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
B, L, H, E = queries.shape
|
||||
_, S, _, D = values.shape
|
||||
scale = self.scale or 1. / sqrt(E)
|
||||
scale = self.scale or 1.0 / sqrt(E)
|
||||
|
||||
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
|
||||
if self.mask_flag:
|
||||
@ -673,14 +668,12 @@ class ProbAttention(nn.Module):
|
||||
|
||||
attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
|
||||
|
||||
context_in[
|
||||
torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :
|
||||
] = torch.matmul(attn, V).type_as(context_in)
|
||||
context_in[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = torch.matmul(
|
||||
attn, V
|
||||
).type_as(context_in)
|
||||
if self.output_attention:
|
||||
attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device)
|
||||
attns[
|
||||
torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :
|
||||
] = attn
|
||||
attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
|
||||
return (context_in, attns)
|
||||
else:
|
||||
return (context_in, None)
|
||||
@ -708,18 +701,14 @@ class ProbAttention(nn.Module):
|
||||
# get the context
|
||||
context = self._get_initial_context(values, L_Q)
|
||||
# update the context with selected top_k queries
|
||||
context, attn = self._update_context(
|
||||
context, values, scores_top, index, L_Q, attn_mask
|
||||
)
|
||||
context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask)
|
||||
|
||||
return context.transpose(2, 1).contiguous(), attn
|
||||
|
||||
|
||||
# source: https://github.com/zhouhaoyi/Informer2020/blob/main/models/attn.py
|
||||
class AttentionLayer(nn.Module):
|
||||
def __init__(
|
||||
self, attention, d_model, n_heads, d_keys=None, d_values=None, mix=False
|
||||
):
|
||||
def __init__(self, attention, d_model, n_heads, d_keys=None, d_values=None, mix=False):
|
||||
super(AttentionLayer, self).__init__()
|
||||
|
||||
d_keys = d_keys or (d_model // n_heads)
|
||||
@ -761,13 +750,13 @@ class ConvLayer(nn.Module):
|
||||
padding=1,
|
||||
padding_mode="circular",
|
||||
)
|
||||
self.norm = nn.BatchNorm1d(c_in) # Eli question: why batchnorm here?
|
||||
self.norm = nn.BatchNorm1d(c_in) # Eli question: why batchnorm here?
|
||||
self.activation = nn.ELU()
|
||||
self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.downConv(x.permute(0, 2, 1))
|
||||
x = self.norm(x) # Eli: why? maybe because the impl...
|
||||
x = self.norm(x) # Eli: why? maybe because the impl...
|
||||
x = self.activation(x)
|
||||
x = self.maxPool(x)
|
||||
x = x.transpose(1, 2)
|
||||
@ -830,9 +819,7 @@ class DecoderLayer(nn.Module):
|
||||
x = x + self.dropout(self.self_attention(x, x, x, attn_mask=x_mask)[0])
|
||||
x = self.norm1(x)
|
||||
|
||||
x = x + self.dropout(
|
||||
self.cross_attention(x, cross, cross, attn_mask=cross_mask)[0]
|
||||
)
|
||||
x = x + self.dropout(self.cross_attention(x, cross, cross, attn_mask=cross_mask)[0])
|
||||
|
||||
y = x = self.norm2(x)
|
||||
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
|
||||
@ -847,8 +834,9 @@ class InformerEncoder(nn.Module):
|
||||
|
||||
self.activation_fn = ACT2FN[config.activation_function]
|
||||
Attn = ProbAttention if config.attn == "prob" else FullAttention
|
||||
self.attn_layers = nn.ModuleList([
|
||||
EncoderLayer(
|
||||
self.attn_layers = nn.ModuleList(
|
||||
[
|
||||
EncoderLayer(
|
||||
AttentionLayer(
|
||||
Attn(
|
||||
mask_flag=False,
|
||||
@ -864,8 +852,10 @@ class InformerEncoder(nn.Module):
|
||||
d_ff=config.encoder_ffn_dim,
|
||||
dropout=config.attention_dropout,
|
||||
activation=self.activation_fn,
|
||||
) for _ in range(config.encoder_layers)
|
||||
])
|
||||
)
|
||||
for _ in range(config.encoder_layers)
|
||||
]
|
||||
)
|
||||
|
||||
if config.distil is not None:
|
||||
self.conv_layers = nn.ModuleList([ConvLayer(config.d_model) for _ in range(config.encoder_layers - 1)])
|
||||
@ -1000,22 +990,15 @@ class InformerModel(InformerPreTrainedModel):
|
||||
self, sequence: torch.Tensor, subsequences_length: int, shift: int = 0
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Returns lagged subsequences of a given sequence.
|
||||
Parameters
|
||||
----------
|
||||
sequence : Tensor
|
||||
the sequence from which lagged subsequences should be extracted.
|
||||
Shape: (N, T, C).
|
||||
Returns lagged subsequences of a given sequence. Parameters ---------- sequence : Tensor
|
||||
the sequence from which lagged subsequences should be extracted. Shape: (N, T, C).
|
||||
subsequences_length : int
|
||||
length of the subsequences to be extracted.
|
||||
shift: int
|
||||
shift the lags by this amount back.
|
||||
Returns
|
||||
--------
|
||||
lagged : Tensor
|
||||
a tensor of shape (N, S, C, I), where S = subsequences_length and
|
||||
I = len(indices), containing lagged subsequences. Specifically,
|
||||
lagged[i, j, :, k] = sequence[i, -indices[k]-S+j, :].
|
||||
Returns -------- lagged : Tensor
|
||||
a tensor of shape (N, S, C, I), where S = subsequences_length and I = len(indices), containing lagged
|
||||
subsequences. Specifically, lagged[i, j, :, k] = sequence[i, -indices[k]-S+j, :].
|
||||
"""
|
||||
sequence_length = sequence.shape[1]
|
||||
indices = [lag - shift for lag in self.config.lags_sequence]
|
||||
@ -1125,24 +1108,24 @@ class InformerModel(InformerPreTrainedModel):
|
||||
return self.decoder
|
||||
|
||||
def forward(
|
||||
self,
|
||||
past_values: torch.Tensor,
|
||||
past_time_features: torch.Tensor,
|
||||
past_observed_mask: torch.Tensor,
|
||||
static_categorical_features: torch.Tensor,
|
||||
static_real_features: torch.Tensor,
|
||||
future_values: Optional[torch.Tensor] = None,
|
||||
future_time_features: Optional[torch.Tensor] = None,
|
||||
decoder_attention_mask: Optional[torch.LongTensor] = None,
|
||||
head_mask: Optional[torch.Tensor] = None,
|
||||
decoder_head_mask: Optional[torch.Tensor] = None,
|
||||
cross_attn_head_mask: Optional[torch.Tensor] = None,
|
||||
encoder_outputs: Optional[List[torch.FloatTensor]] = None,
|
||||
past_key_values: Optional[List[torch.FloatTensor]] = None,
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
output_attentions: Optional[bool] = None,
|
||||
use_cache: Optional[bool] = None,
|
||||
return_dict: Optional[bool] = None,
|
||||
self,
|
||||
past_values: torch.Tensor,
|
||||
past_time_features: torch.Tensor,
|
||||
past_observed_mask: torch.Tensor,
|
||||
static_categorical_features: torch.Tensor,
|
||||
static_real_features: torch.Tensor,
|
||||
future_values: Optional[torch.Tensor] = None,
|
||||
future_time_features: Optional[torch.Tensor] = None,
|
||||
decoder_attention_mask: Optional[torch.LongTensor] = None,
|
||||
head_mask: Optional[torch.Tensor] = None,
|
||||
decoder_head_mask: Optional[torch.Tensor] = None,
|
||||
cross_attn_head_mask: Optional[torch.Tensor] = None,
|
||||
encoder_outputs: Optional[List[torch.FloatTensor]] = None,
|
||||
past_key_values: Optional[List[torch.FloatTensor]] = None,
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
output_attentions: Optional[bool] = None,
|
||||
use_cache: Optional[bool] = None,
|
||||
return_dict: Optional[bool] = None,
|
||||
) -> Union[Seq2SeqTimeSeriesModelOutput, Tuple]:
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
@ -1178,7 +1161,7 @@ class InformerModel(InformerPreTrainedModel):
|
||||
attentions=encoder_outputs[2] if len(encoder_outputs) > 2 else None,
|
||||
)
|
||||
|
||||
dec_input = transformer_inputs[:, self.config.context_length:, ...]
|
||||
dec_input = transformer_inputs[:, self.config.context_length :, ...]
|
||||
decoder_outputs = self.decoder(
|
||||
inputs_embeds=dec_input,
|
||||
attention_mask=decoder_attention_mask,
|
||||
@ -1462,6 +1445,3 @@ class InformerForPrediction(InformerPreTrainedModel):
|
||||
(-1, num_parallel_samples, self.config.prediction_length) + self.target_shape,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
@ -31,15 +31,8 @@ TOLERANCE = 1e-4
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
InformerConfig,
|
||||
InformerForPrediction,
|
||||
InformerModel,
|
||||
)
|
||||
from transformers.models.informer.modeling_informer import (
|
||||
InformerDecoder,
|
||||
InformerEncoder,
|
||||
)
|
||||
from transformers import InformerConfig, InformerForPrediction, InformerModel
|
||||
from transformers.models.informer.modeling_informer import InformerDecoder, InformerEncoder
|
||||
|
||||
|
||||
@require_torch
|
||||
@ -171,9 +164,7 @@ class InformerModelTester:
|
||||
|
||||
@require_torch
|
||||
class InformerModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
all_model_classes = (
|
||||
(InformerModel, InformerForPrediction) if is_torch_available() else ()
|
||||
)
|
||||
all_model_classes = (InformerModel, InformerForPrediction) if is_torch_available() else ()
|
||||
all_generative_model_classes = (InformerForPrediction,) if is_torch_available() else ()
|
||||
is_encoder_decoder = True
|
||||
test_pruning = False
|
||||
@ -374,9 +365,7 @@ def prepare_batch(filename="train-batch.pt"):
|
||||
@slow
|
||||
class InformerModelIntegrationTests(unittest.TestCase):
|
||||
def test_inference_no_head(self):
|
||||
model = InformerModel.from_pretrained("huggingface/time-series-transformer-tourism-monthly").to(
|
||||
torch_device
|
||||
)
|
||||
model = InformerModel.from_pretrained("huggingface/time-series-transformer-tourism-monthly").to(torch_device)
|
||||
batch = prepare_batch()
|
||||
|
||||
with torch.no_grad():
|
||||
@ -399,9 +388,9 @@ class InformerModelIntegrationTests(unittest.TestCase):
|
||||
self.assertTrue(torch.allclose(output[0, :3, :3], expected_slice, atol=TOLERANCE))
|
||||
|
||||
def test_inference_head(self):
|
||||
model = InformerForPrediction.from_pretrained(
|
||||
"huggingface/time-series-transformer-tourism-monthly"
|
||||
).to(torch_device)
|
||||
model = InformerForPrediction.from_pretrained("huggingface/time-series-transformer-tourism-monthly").to(
|
||||
torch_device
|
||||
)
|
||||
batch = prepare_batch("val-batch.pt")
|
||||
with torch.no_grad():
|
||||
output = model(
|
||||
@ -421,9 +410,9 @@ class InformerModelIntegrationTests(unittest.TestCase):
|
||||
self.assertTrue(torch.allclose(output[0, :3, :3], expected_slice, atol=TOLERANCE))
|
||||
|
||||
def test_seq_to_seq_generation(self):
|
||||
model = InformerForPrediction.from_pretrained(
|
||||
"huggingface/time-series-transformer-tourism-monthly"
|
||||
).to(torch_device)
|
||||
model = InformerForPrediction.from_pretrained("huggingface/time-series-transformer-tourism-monthly").to(
|
||||
torch_device
|
||||
)
|
||||
batch = prepare_batch("val-batch.pt")
|
||||
with torch.no_grad():
|
||||
outputs = model.generate(
|
||||
|
Loading…
Reference in New Issue
Block a user