mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Rename add_start_docstrings_to_callable (#8120)
This commit is contained in:
parent
6241c873cd
commit
378142afdf
@ -20,7 +20,7 @@ import torch
|
||||
import torch.nn as nn
|
||||
from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
|
||||
from transformers.modeling_albert import (
|
||||
ALBERT_INPUTS_DOCSTRING,
|
||||
ALBERT_START_DOCSTRING,
|
||||
@ -87,7 +87,7 @@ class AlbertModelWithPabee(AlbertModel):
|
||||
message = f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up = {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***"
|
||||
print(message)
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING)
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@ -230,7 +230,7 @@ class AlbertForSequenceClassificationWithPabee(AlbertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING)
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
|
@ -22,7 +22,7 @@ import torch
|
||||
from torch import nn
|
||||
from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
|
||||
from transformers.modeling_bert import (
|
||||
BERT_INPUTS_DOCSTRING,
|
||||
BERT_START_DOCSTRING,
|
||||
@ -92,7 +92,7 @@ class BertModelWithPabee(BertModel):
|
||||
message = f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up = {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***"
|
||||
print(message)
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING)
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@ -254,7 +254,7 @@ class BertForSequenceClassificationWithPabee(BertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING)
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
|
@ -2,7 +2,7 @@ import torch
|
||||
from torch import nn
|
||||
from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
|
||||
from transformers.modeling_bert import (
|
||||
BERT_INPUTS_DOCSTRING,
|
||||
BERT_START_DOCSTRING,
|
||||
@ -134,7 +134,7 @@ class DeeBertModel(BertPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING)
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@ -288,7 +288,7 @@ class DeeBertForSequenceClassification(BertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING)
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
|
@ -4,7 +4,7 @@ import torch.nn as nn
|
||||
from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from transformers.configuration_roberta import RobertaConfig
|
||||
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
|
||||
from transformers.modeling_roberta import ROBERTA_INPUTS_DOCSTRING, ROBERTA_START_DOCSTRING, RobertaEmbeddings
|
||||
|
||||
from .modeling_highway_bert import BertPreTrainedModel, DeeBertModel, HighwayException, entropy
|
||||
@ -45,7 +45,7 @@ class DeeRobertaForSequenceClassification(BertPreTrainedModel):
|
||||
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
||||
self.classifier = nn.Linear(config.hidden_size, self.config.num_labels)
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING)
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
|
@ -28,7 +28,7 @@ from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from emmental import MaskedBertConfig
|
||||
from emmental.modules import MaskedLinear
|
||||
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
|
||||
from transformers.modeling_bert import ACT2FN, BertLayerNorm, load_tf_weights_in_bert
|
||||
from transformers.modeling_utils import PreTrainedModel, prune_linear_layer
|
||||
|
||||
@ -498,7 +498,7 @@ class MaskedBertModel(MaskedBertPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING)
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@ -671,7 +671,7 @@ class MaskedBertForSequenceClassification(MaskedBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING)
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@ -756,7 +756,7 @@ class MaskedBertForMultipleChoice(MaskedBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING)
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@ -846,7 +846,7 @@ class MaskedBertForTokenClassification(MaskedBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING)
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@ -932,7 +932,7 @@ class MaskedBertForQuestionAnswering(MaskedBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING)
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
|
@ -425,7 +425,7 @@ def add_start_docstrings(*docstr):
|
||||
return docstring_decorator
|
||||
|
||||
|
||||
def add_start_docstrings_to_callable(*docstr):
|
||||
def add_start_docstrings_to_model_forward(*docstr):
|
||||
def docstring_decorator(fn):
|
||||
class_name = ":class:`~transformers.{}`".format(fn.__qualname__.split(".")[0])
|
||||
intro = " The {} forward method, overrides the :func:`__call__` special method.".format(class_name)
|
||||
|
@ -30,7 +30,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import (
|
||||
@ -631,7 +631,7 @@ class AlbertModel(AlbertPreTrainedModel):
|
||||
inner_group_idx = int(layer - group_idx * self.config.inner_group_num)
|
||||
self.encoder.albert_layer_groups[group_idx].albert_layers[inner_group_idx].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="albert-base-v2",
|
||||
@ -727,7 +727,7 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
|
||||
def get_input_embeddings(self):
|
||||
return self.albert.embeddings.word_embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=AlbertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -879,7 +879,7 @@ class AlbertForMaskedLM(AlbertPreTrainedModel):
|
||||
def get_input_embeddings(self):
|
||||
return self.albert.embeddings.word_embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="albert-base-v2",
|
||||
@ -967,7 +967,7 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="albert-base-v2",
|
||||
@ -1055,7 +1055,7 @@ class AlbertForTokenClassification(AlbertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="albert-base-v2",
|
||||
@ -1143,7 +1143,7 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="albert-base-v2",
|
||||
@ -1242,7 +1242,7 @@ class AlbertForMultipleChoice(AlbertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="albert-base-v2",
|
||||
|
@ -30,7 +30,7 @@ from .file_utils import (
|
||||
add_code_sample_docstrings,
|
||||
add_end_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import (
|
||||
@ -846,7 +846,7 @@ class BartModel(PretrainedBartModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="facebook/bart-large",
|
||||
@ -981,7 +981,7 @@ class BartForConditionalGeneration(PretrainedBartModel):
|
||||
new_bias = torch.cat([self.final_logits_bias, extra_bias], dim=1)
|
||||
self.register_buffer("final_logits_bias", new_bias)
|
||||
|
||||
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||
@add_end_docstrings(BART_GENERATION_EXAMPLE)
|
||||
def forward(
|
||||
@ -1147,7 +1147,7 @@ class BartForSequenceClassification(PretrainedBartModel):
|
||||
self.model._init_weights(self.classification_head.dense)
|
||||
self.model._init_weights(self.classification_head.out_proj)
|
||||
|
||||
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="facebook/bart-large",
|
||||
@ -1234,7 +1234,7 @@ class BartForQuestionAnswering(PretrainedBartModel):
|
||||
|
||||
self.model._init_weights(self.qa_outputs)
|
||||
|
||||
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="facebook/bart-large",
|
||||
|
@ -33,7 +33,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import (
|
||||
@ -748,7 +748,7 @@ class BertModel(BertPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="bert-base-uncased",
|
||||
@ -870,7 +870,7 @@ class BertForPreTraining(BertPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.cls.predictions.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=BertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -983,7 +983,7 @@ class BertLMHeadModel(BertPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.cls.predictions.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1103,7 +1103,7 @@ class BertForMaskedLM(BertPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.cls.predictions.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="bert-base-uncased",
|
||||
@ -1206,7 +1206,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=NextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1300,7 +1300,7 @@ class BertForSequenceClassification(BertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="bert-base-uncased",
|
||||
@ -1384,7 +1384,7 @@ class BertForMultipleChoice(BertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="bert-base-uncased",
|
||||
@ -1479,7 +1479,7 @@ class BertForTokenClassification(BertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="bert-base-uncased",
|
||||
@ -1569,7 +1569,7 @@ class BertForQuestionAnswering(BertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="bert-base-uncased",
|
||||
|
@ -24,7 +24,7 @@ from .configuration_bert_generation import BertGenerationConfig
|
||||
from .file_utils import (
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_bert import BertEncoder
|
||||
@ -293,7 +293,7 @@ class BertGenerationEncoder(BertGenerationPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_GENERATION_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_GENERATION_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/bert_for_seq_generation_L-24_bbc_encoder",
|
||||
@ -421,7 +421,7 @@ class BertGenerationDecoder(BertGenerationPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_GENERATION_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_GENERATION_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
|
@ -24,7 +24,7 @@ import torch.nn as nn
|
||||
from torch.nn import CrossEntropyLoss
|
||||
|
||||
from .configuration_ctrl import CTRLConfig
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
|
||||
from .modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
|
||||
from .modeling_utils import Conv1D, PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer
|
||||
from .utils import logging
|
||||
@ -349,7 +349,7 @@ class CTRLModel(CTRLPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.h[layer].multi_head_attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="ctrl",
|
||||
@ -521,7 +521,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
|
||||
|
||||
return {"input_ids": input_ids, "past_key_values": past, "use_cache": kwargs["use_cache"]}
|
||||
|
||||
@add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="ctrl",
|
||||
|
@ -24,7 +24,7 @@ from torch.nn import CrossEntropyLoss
|
||||
|
||||
from .activations import ACT2FN
|
||||
from .configuration_deberta import DebertaConfig
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
|
||||
from .modeling_outputs import BaseModelOutput, SequenceClassifierOutput
|
||||
from .modeling_utils import PreTrainedModel
|
||||
from .utils import logging
|
||||
@ -858,7 +858,7 @@ class DebertaModel(DebertaPreTrainedModel):
|
||||
"""
|
||||
raise NotImplementedError("The prune function is not implemented in DeBERTa model.")
|
||||
|
||||
@add_start_docstrings_to_callable(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="microsoft/deberta-base",
|
||||
@ -976,7 +976,7 @@ class DebertaForSequenceClassification(DebertaPreTrainedModel):
|
||||
def set_input_embeddings(self, new_embeddings):
|
||||
self.deberta.set_input_embeddings(new_embeddings)
|
||||
|
||||
@add_start_docstrings_to_callable(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="microsoft/deberta-base",
|
||||
|
@ -32,7 +32,7 @@ from .configuration_distilbert import DistilBertConfig
|
||||
from .file_utils import (
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import (
|
||||
@ -436,7 +436,7 @@ class DistilBertModel(DistilBertPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.transformer.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices"))
|
||||
@add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="distilbert-base-uncased",
|
||||
@ -509,7 +509,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.vocab_projector
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices"))
|
||||
@add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="distilbert-base-uncased",
|
||||
@ -595,7 +595,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices"))
|
||||
@add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="distilbert-base-uncased",
|
||||
@ -676,7 +676,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices"))
|
||||
@add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="distilbert-base-uncased",
|
||||
@ -772,7 +772,7 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="distilbert-base-uncased",
|
||||
@ -856,7 +856,9 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(
|
||||
DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")
|
||||
)
|
||||
@replace_return_docstrings(output_type=MultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
|
@ -22,7 +22,12 @@ import torch
|
||||
from torch import Tensor, nn
|
||||
|
||||
from .configuration_dpr import DPRConfig
|
||||
from .file_utils import ModelOutput, add_start_docstrings, add_start_docstrings_to_callable, replace_return_docstrings
|
||||
from .file_utils import (
|
||||
ModelOutput,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_bert import BertModel
|
||||
from .modeling_outputs import BaseModelOutputWithPooling
|
||||
from .modeling_utils import PreTrainedModel
|
||||
@ -431,7 +436,7 @@ class DPRContextEncoder(DPRPretrainedContextEncoder):
|
||||
self.ctx_encoder = DPREncoder(config)
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(DPR_ENCODERS_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(DPR_ENCODERS_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=DPRContextEncoderOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -509,7 +514,7 @@ class DPRQuestionEncoder(DPRPretrainedQuestionEncoder):
|
||||
self.question_encoder = DPREncoder(config)
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(DPR_ENCODERS_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(DPR_ENCODERS_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=DPRQuestionEncoderOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -586,7 +591,7 @@ class DPRReader(DPRPretrainedReader):
|
||||
self.span_predictor = DPRSpanPredictor(config)
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(DPR_READER_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(DPR_READER_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=DPRReaderOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
|
@ -30,7 +30,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import (
|
||||
@ -693,7 +693,7 @@ class ElectraModel(ElectraPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/electra-small-discriminator",
|
||||
@ -791,7 +791,7 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/electra-small-discriminator",
|
||||
@ -873,7 +873,7 @@ class ElectraForPreTraining(ElectraPreTrainedModel):
|
||||
self.discriminator_predictions = ElectraDiscriminatorPredictions(config)
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=ElectraForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -971,7 +971,7 @@ class ElectraForMaskedLM(ElectraPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.generator_lm_head
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/electra-small-discriminator",
|
||||
@ -1060,7 +1060,7 @@ class ElectraForTokenClassification(ElectraPreTrainedModel):
|
||||
self.classifier = nn.Linear(config.hidden_size, config.num_labels)
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/electra-small-discriminator",
|
||||
@ -1147,7 +1147,7 @@ class ElectraForQuestionAnswering(ElectraPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/electra-small-discriminator",
|
||||
@ -1248,7 +1248,7 @@ class ElectraForMultipleChoice(ElectraPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/electra-small-discriminator",
|
||||
|
@ -19,7 +19,7 @@ from typing import Optional
|
||||
|
||||
from .configuration_encoder_decoder import EncoderDecoderConfig
|
||||
from .configuration_utils import PretrainedConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable, replace_return_docstrings
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_model_forward, replace_return_docstrings
|
||||
from .modeling_outputs import Seq2SeqLMOutput
|
||||
from .modeling_utils import PreTrainedModel
|
||||
from .utils import logging
|
||||
@ -335,7 +335,7 @@ class EncoderDecoderModel(PreTrainedModel):
|
||||
config = EncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config, **kwargs)
|
||||
return cls(encoder=encoder, decoder=decoder, config=config)
|
||||
|
||||
@add_start_docstrings_to_callable(ENCODER_DECODER_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(ENCODER_DECODER_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
|
@ -21,7 +21,7 @@ import torch
|
||||
from torch.nn import functional as F
|
||||
|
||||
from .configuration_flaubert import FlaubertConfig
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
|
||||
from .modeling_outputs import BaseModelOutput
|
||||
from .modeling_xlm import (
|
||||
XLMForMultipleChoice,
|
||||
@ -140,7 +140,7 @@ class FlaubertModel(XLMModel):
|
||||
self.layerdrop = getattr(config, "layerdrop", 0.0)
|
||||
self.pre_norm = getattr(config, "pre_norm", False)
|
||||
|
||||
@add_start_docstrings_to_callable(FLAUBERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(FLAUBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="flaubert/flaubert_base_cased",
|
||||
|
@ -43,7 +43,7 @@ from .file_utils import (
|
||||
add_code_sample_docstrings,
|
||||
add_end_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import BaseModelOutput, BaseModelOutputWithPast, Seq2SeqLMOutput, Seq2SeqModelOutput
|
||||
@ -899,7 +899,7 @@ class FSMTModel(PretrainedFSMTModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(FSMT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(FSMT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="facebook/wmt19-ru-en",
|
||||
@ -1039,7 +1039,7 @@ class FSMTForConditionalGeneration(PretrainedFSMTModel):
|
||||
|
||||
return new_embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(FSMT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(FSMT_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||
@add_end_docstrings(FSMT_GENERATION_EXAMPLE)
|
||||
def forward(
|
||||
|
@ -30,7 +30,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import (
|
||||
@ -906,7 +906,7 @@ class FunnelBaseModel(FunnelPreTrainedModel):
|
||||
def set_input_embeddings(self, new_embeddings):
|
||||
self.embeddings.word_embeddings = new_embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small-base",
|
||||
@ -983,7 +983,7 @@ class FunnelModel(FunnelPreTrainedModel):
|
||||
def set_input_embeddings(self, new_embeddings):
|
||||
self.embeddings.word_embeddings = new_embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small",
|
||||
@ -1082,7 +1082,7 @@ class FunnelForPreTraining(FunnelPreTrainedModel):
|
||||
self.discriminator_predictions = FunnelDiscriminatorPredictions(config)
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=FunnelForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1167,7 +1167,7 @@ class FunnelForMaskedLM(FunnelPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small",
|
||||
@ -1240,7 +1240,7 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel):
|
||||
self.classifier = FunnelClassificationHead(config, config.num_labels)
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small-base",
|
||||
@ -1317,7 +1317,7 @@ class FunnelForMultipleChoice(FunnelPreTrainedModel):
|
||||
self.classifier = FunnelClassificationHead(config, 1)
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small-base",
|
||||
@ -1403,7 +1403,7 @@ class FunnelForTokenClassification(FunnelPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small",
|
||||
@ -1485,7 +1485,7 @@ class FunnelForQuestionAnswering(FunnelPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small",
|
||||
|
@ -30,7 +30,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
|
||||
@ -502,7 +502,7 @@ class GPT2Model(GPT2PreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.h[layer].attn.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="gpt2",
|
||||
@ -723,7 +723,7 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
|
||||
"attention_mask": attention_mask,
|
||||
}
|
||||
|
||||
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="gpt2",
|
||||
@ -837,7 +837,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
||||
"use_cache": kwargs.get("use_cache"),
|
||||
}
|
||||
|
||||
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=GPT2DoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -987,7 +987,7 @@ class GPT2ForSequenceClassification(GPT2PreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="microsoft/dialogrpt",
|
||||
|
@ -23,7 +23,7 @@ from torch.nn import CrossEntropyLoss
|
||||
|
||||
from .activations import ACT2FN
|
||||
from .configuration_layoutlm import LayoutLMConfig
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
|
||||
from .modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, MaskedLMOutput, TokenClassifierOutput
|
||||
from .modeling_utils import (
|
||||
PreTrainedModel,
|
||||
@ -607,7 +607,7 @@ class LayoutLMModel(LayoutLMPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_model_forward(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="layoutlm-base-uncased",
|
||||
@ -744,7 +744,7 @@ class LayoutLMForMaskedLM(LayoutLMPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.cls.predictions.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_model_forward(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="layoutlm-base-uncased",
|
||||
@ -832,7 +832,7 @@ class LayoutLMForTokenClassification(LayoutLMPreTrainedModel):
|
||||
def get_input_embeddings(self):
|
||||
return self.layoutlm.embeddings.word_embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_model_forward(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="layoutlm-base-uncased",
|
||||
|
@ -27,7 +27,7 @@ from .configuration_longformer import LongformerConfig
|
||||
from .file_utils import (
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import (
|
||||
@ -1181,7 +1181,7 @@ class LongformerModel(LongformerPreTrainedModel):
|
||||
attention_mask = global_attention_mask + 1
|
||||
return attention_mask
|
||||
|
||||
@add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1308,7 +1308,7 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1412,7 +1412,7 @@ class LongformerForSequenceClassification(LongformerPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="allenai/longformer-base-4096",
|
||||
@ -1521,7 +1521,7 @@ class LongformerForQuestionAnswering(LongformerPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1655,7 +1655,7 @@ class LongformerForTokenClassification(LongformerPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="allenai/longformer-base-4096",
|
||||
@ -1742,7 +1742,9 @@ class LongformerForMultipleChoice(LongformerPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(
|
||||
LONGFORMER_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")
|
||||
)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="allenai/longformer-base-4096",
|
||||
|
@ -30,7 +30,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_utils import PreTrainedModel
|
||||
@ -893,7 +893,7 @@ class LxmertModel(LxmertPreTrainedModel):
|
||||
def set_input_embeddings(self, new_embeddings):
|
||||
self.embeddings.word_embeddings = new_embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="unc-nlp/lxmert-base-uncased",
|
||||
@ -1145,7 +1145,7 @@ class LxmertForPreTraining(LxmertPreTrainedModel):
|
||||
|
||||
return new_qa_logit_layer
|
||||
|
||||
@add_start_docstrings_to_callable(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=LxmertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1368,7 +1368,7 @@ class LxmertForQuestionAnswering(LxmertPreTrainedModel):
|
||||
|
||||
return new_qa_logit_layer
|
||||
|
||||
@add_start_docstrings_to_callable(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="unc-nlp/lxmert-base-uncased",
|
||||
|
@ -20,7 +20,7 @@ import torch
|
||||
import torch.nn as nn
|
||||
from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable, replace_return_docstrings
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_model_forward, replace_return_docstrings
|
||||
from .modeling_outputs import BaseModelOutputWithPooling, SequenceClassifierOutput
|
||||
from .modeling_utils import ModuleUtilsMixin
|
||||
from .utils import logging
|
||||
@ -187,7 +187,7 @@ class MMBTModel(nn.Module, ModuleUtilsMixin):
|
||||
self.transformer = transformer
|
||||
self.modal_encoder = ModalEmbeddings(config, encoder, transformer.embeddings)
|
||||
|
||||
@add_start_docstrings_to_callable(MMBT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(MMBT_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
|
@ -37,7 +37,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import (
|
||||
@ -837,7 +837,7 @@ class MobileBertModel(MobileBertPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/mobilebert-uncased",
|
||||
@ -970,7 +970,7 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel):
|
||||
if output_embeddings is not None and self.config.tie_word_embeddings:
|
||||
self._tie_or_clone_weights(output_embeddings, self.get_input_embeddings())
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=MobileBertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1088,7 +1088,7 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
|
||||
if output_embeddings is not None and self.config.tie_word_embeddings:
|
||||
self._tie_or_clone_weights(output_embeddings, self.get_input_embeddings())
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/mobilebert-uncased",
|
||||
@ -1184,7 +1184,7 @@ class MobileBertForNextSentencePrediction(MobileBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=NextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1276,7 +1276,7 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/mobilebert-uncased",
|
||||
@ -1361,7 +1361,7 @@ class MobileBertForQuestionAnswering(MobileBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/mobilebert-uncased",
|
||||
@ -1460,7 +1460,9 @@ class MobileBertForMultipleChoice(MobileBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(
|
||||
MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")
|
||||
)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/mobilebert-uncased",
|
||||
@ -1555,7 +1557,7 @@ class MobileBertForTokenClassification(MobileBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/mobilebert-uncased",
|
||||
|
@ -33,7 +33,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
|
||||
@ -427,7 +427,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.h[layer].attn.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="openai-gpt",
|
||||
@ -543,7 +543,7 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head
|
||||
|
||||
@add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="openai-gpt",
|
||||
@ -629,7 +629,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head
|
||||
|
||||
@add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=OpenAIGPTDoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -754,7 +754,7 @@ class OpenAIGPTForSequenceClassification(OpenAIGPTPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="openai-gpt",
|
||||
|
@ -25,7 +25,12 @@ from torch import Tensor, nn
|
||||
|
||||
from .activations import ACT2FN
|
||||
from .configuration_prophetnet import ProphetNetConfig
|
||||
from .file_utils import ModelOutput, add_start_docstrings, add_start_docstrings_to_callable, replace_return_docstrings
|
||||
from .file_utils import (
|
||||
ModelOutput,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import BaseModelOutput
|
||||
from .modeling_utils import PreTrainedModel
|
||||
from .utils import logging
|
||||
@ -1138,7 +1143,7 @@ class ProphetNetEncoder(ProphetNetPreTrainedModel):
|
||||
def set_input_embeddings(self, value):
|
||||
self.word_embeddings = value
|
||||
|
||||
@add_start_docstrings_to_callable(PROPHETNET_STANDALONE_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(PROPHETNET_STANDALONE_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1254,7 +1259,7 @@ class ProphetNetDecoder(ProphetNetPreTrainedModel):
|
||||
def set_input_embeddings(self, value):
|
||||
self.word_embeddings = value
|
||||
|
||||
@add_start_docstrings_to_callable(PROPHETNET_STANDALONE_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(PROPHETNET_STANDALONE_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=ProphetNetDecoderModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1570,7 +1575,7 @@ class ProphetNetModel(ProphetNetPreTrainedModel):
|
||||
def get_decoder(self):
|
||||
return self.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(PROPHETNET_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(PROPHETNET_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=ProphetNetSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1674,7 +1679,7 @@ class ProphetNetForConditionalGeneration(ProphetNetPreTrainedModel):
|
||||
def get_input_embeddings(self):
|
||||
return self.prophetnet.word_embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(PROPHETNET_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(PROPHETNET_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=ProphetNetSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1865,7 +1870,7 @@ class ProphetNetForCausalLM(ProphetNetPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head
|
||||
|
||||
@add_start_docstrings_to_callable(PROPHETNET_STANDALONE_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(PROPHETNET_STANDALONE_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=ProphetNetDecoderLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
|
@ -21,7 +21,7 @@ import torch
|
||||
|
||||
from .configuration_rag import RagConfig
|
||||
from .configuration_utils import PretrainedConfig
|
||||
from .file_utils import add_start_docstrings_to_callable, replace_return_docstrings
|
||||
from .file_utils import add_start_docstrings_to_model_forward, replace_return_docstrings
|
||||
from .modeling_outputs import ModelOutput
|
||||
from .modeling_utils import PreTrainedModel
|
||||
from .retrieval_rag import RagRetriever
|
||||
@ -459,7 +459,7 @@ RAG_FORWARD_INPUTS_DOCSTRING = r"""
|
||||
"""
|
||||
|
||||
|
||||
@add_start_docstrings_to_callable(RAG_START_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(RAG_START_DOCSTRING)
|
||||
class RagModel(RagPreTrainedModel):
|
||||
def __init__(
|
||||
self,
|
||||
@ -502,7 +502,7 @@ class RagModel(RagPreTrainedModel):
|
||||
self.question_encoder = question_encoder
|
||||
self.generator = generator
|
||||
|
||||
@add_start_docstrings_to_callable(RAG_FORWARD_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(RAG_FORWARD_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=RetrievAugLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -658,7 +658,7 @@ class RagModel(RagPreTrainedModel):
|
||||
)
|
||||
|
||||
|
||||
@add_start_docstrings_to_callable(
|
||||
@add_start_docstrings_to_model_forward(
|
||||
"""
|
||||
A RAG-sequence model impementation. It performs RAG-sequence specific marginalization in the forward pass.
|
||||
""",
|
||||
@ -687,7 +687,7 @@ class RagSequenceForGeneration(RagPreTrainedModel):
|
||||
def set_retriever(self, retriever: RagRetriever):
|
||||
self.rag.retriever = retriever
|
||||
|
||||
@add_start_docstrings_to_callable(RAG_FORWARD_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(RAG_FORWARD_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=RetrievAugLMMarginOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -984,7 +984,7 @@ class RagSequenceForGeneration(RagPreTrainedModel):
|
||||
return output
|
||||
|
||||
|
||||
@add_start_docstrings_to_callable(
|
||||
@add_start_docstrings_to_model_forward(
|
||||
"""
|
||||
A RAG-token model impementation. It performs RAG-token specific marginalization in the forward pass.
|
||||
""",
|
||||
@ -1080,7 +1080,7 @@ class RagTokenForGeneration(RagPreTrainedModel):
|
||||
log_prob_sum = seq_logprobs + doc_logprobs.unsqueeze(-1).unsqueeze(-1)
|
||||
return torch.logsumexp(log_prob_sum, dim=1)
|
||||
|
||||
@add_start_docstrings_to_callable(RAG_FORWARD_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(RAG_FORWARD_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=RetrievAugLMMarginOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
|
@ -36,7 +36,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
)
|
||||
from .modeling_outputs import CausalLMOutput, MaskedLMOutput, QuestionAnsweringModelOutput, SequenceClassifierOutput
|
||||
from .modeling_utils import PreTrainedModel, apply_chunking_to_forward
|
||||
@ -1991,7 +1991,7 @@ class ReformerModel(ReformerPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/reformer-crime-and-punishment",
|
||||
@ -2195,7 +2195,7 @@ class ReformerModelWithLMHead(ReformerPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/reformer-crime-and-punishment",
|
||||
@ -2309,7 +2309,7 @@ class ReformerForMaskedLM(ReformerPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/reformer-crime-and-punishment",
|
||||
@ -2389,7 +2389,7 @@ class ReformerForSequenceClassification(ReformerPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/reformer-crime-and-punishment",
|
||||
@ -2491,7 +2491,7 @@ class ReformerForQuestionAnswering(ReformerPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/reformer-crime-and-punishment",
|
||||
|
@ -27,7 +27,7 @@ from .configuration_roberta import RobertaConfig
|
||||
from .file_utils import (
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import (
|
||||
@ -595,7 +595,7 @@ class RobertaModel(RobertaPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="roberta-base",
|
||||
@ -718,7 +718,7 @@ class RobertaForCausalLM(RobertaPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -838,7 +838,7 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="roberta-base",
|
||||
@ -956,7 +956,7 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="roberta-base",
|
||||
@ -1039,7 +1039,7 @@ class RobertaForMultipleChoice(RobertaPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="roberta-base",
|
||||
@ -1133,7 +1133,7 @@ class RobertaForTokenClassification(RobertaPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="roberta-base",
|
||||
@ -1242,7 +1242,7 @@ class RobertaForQuestionAnswering(RobertaPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="roberta-base",
|
||||
|
@ -23,7 +23,7 @@ from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from .activations import ACT2FN
|
||||
from .configuration_squeezebert import SqueezeBertConfig
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
|
||||
from .modeling_outputs import (
|
||||
BaseModelOutput,
|
||||
BaseModelOutputWithPooling,
|
||||
@ -518,7 +518,7 @@ class SqueezeBertModel(SqueezeBertPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="squeezebert/squeezebert-mnli-headless",
|
||||
@ -605,7 +605,7 @@ class SqueezeBertForMaskedLM(SqueezeBertPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head
|
||||
|
||||
@add_start_docstrings_to_callable(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="squeezebert/squeezebert-uncased",
|
||||
@ -683,7 +683,7 @@ class SqueezeBertForSequenceClassification(SqueezeBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="squeezebert/squeezebert-mnli-headless",
|
||||
@ -767,7 +767,7 @@ class SqueezeBertForMultipleChoice(SqueezeBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(
|
||||
@add_start_docstrings_to_model_forward(
|
||||
SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)")
|
||||
)
|
||||
@add_code_sample_docstrings(
|
||||
@ -861,7 +861,7 @@ class SqueezeBertForTokenClassification(SqueezeBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="squeezebert/squeezebert-mnli-headless",
|
||||
@ -948,7 +948,7 @@ class SqueezeBertForQuestionAnswering(SqueezeBertPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="squeezebert/squeezebert-mnli-headless",
|
||||
|
@ -30,7 +30,7 @@ from .file_utils import (
|
||||
DUMMY_INPUTS,
|
||||
DUMMY_MASK,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import BaseModelOutput, BaseModelOutputWithPast, Seq2SeqLMOutput, Seq2SeqModelOutput
|
||||
@ -943,7 +943,7 @@ class T5Model(T5PreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(T5_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(T5_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=Seq2SeqModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1086,7 +1086,7 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
|
||||
def get_decoder(self):
|
||||
return self.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(T5_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(T5_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
|
@ -28,7 +28,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_tf_outputs import (
|
||||
@ -747,7 +747,7 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.albert = TFAlbertMainLayer(config, name="albert")
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="albert-base-v2",
|
||||
@ -778,7 +778,7 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.albert.embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=TFAlbertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
@ -847,7 +847,7 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss)
|
||||
def get_output_embeddings(self):
|
||||
return self.albert.embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="albert-base-v2",
|
||||
@ -930,7 +930,7 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="albert-base-v2",
|
||||
@ -1018,7 +1018,7 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="albert-base-v2",
|
||||
@ -1104,7 +1104,7 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="albert-base-v2",
|
||||
@ -1212,7 +1212,7 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss):
|
||||
"""
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="albert-base-v2",
|
||||
|
@ -25,7 +25,7 @@ from tensorflow.keras.layers import Dense, LayerNormalization
|
||||
|
||||
from .activations_tf import ACT2FN
|
||||
from .configuration_bart import BartConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable, replace_return_docstrings
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_model_forward, replace_return_docstrings
|
||||
from .modeling_tf_outputs import TFBaseModelOutput, TFBaseModelOutputWithPast, TFSeq2SeqLMOutput, TFSeq2SeqModelOutput
|
||||
|
||||
# Public API
|
||||
@ -827,7 +827,7 @@ class TFBartModel(TFPretrainedBartModel):
|
||||
causal_lm_mask = causal_attention_mask(tgt_len, tgt_len, mask_dtype)
|
||||
return decoder_input_ids, decoder_padding_mask, causal_lm_mask
|
||||
|
||||
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=TFSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(
|
||||
self,
|
||||
@ -961,7 +961,7 @@ class TFBartForConditionalGeneration(TFPretrainedBartModel):
|
||||
self.model = TFBartModel(config, name="model")
|
||||
self.use_cache = config.use_cache
|
||||
|
||||
@add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(
|
||||
self,
|
||||
|
@ -28,7 +28,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_tf_outputs import (
|
||||
@ -793,7 +793,7 @@ class TFBertModel(TFBertPreTrainedModel):
|
||||
|
||||
self.bert = TFBertMainLayer(config, name="bert")
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="bert-base-cased",
|
||||
@ -824,7 +824,7 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.bert.embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=TFBertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
@ -881,7 +881,7 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
|
||||
def get_output_embeddings(self):
|
||||
return self.bert.embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="bert-base-cased",
|
||||
@ -1043,7 +1043,7 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
|
||||
self.bert = TFBertMainLayer(config, name="bert")
|
||||
self.nsp = TFBertNSPHead(config, name="nsp___cls")
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=TFNextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
@ -1098,7 +1098,7 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="bert-base-cased",
|
||||
@ -1191,7 +1191,7 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
|
||||
"""
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="bert-base-cased",
|
||||
@ -1315,7 +1315,7 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="bert-base-cased",
|
||||
@ -1400,7 +1400,7 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="bert-base-cased",
|
||||
|
@ -20,7 +20,7 @@ import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_ctrl import CTRLConfig
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
|
||||
from .modeling_tf_outputs import TFBaseModelOutputWithPast, TFCausalLMOutputWithPast
|
||||
from .modeling_tf_utils import (
|
||||
TFCausalLanguageModelingLoss,
|
||||
@ -547,7 +547,7 @@ class TFCTRLModel(TFCTRLPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.transformer = TFCTRLMainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="ctrl",
|
||||
@ -602,7 +602,7 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
|
||||
return {"inputs": inputs, "past": past, "use_cache": kwargs["use_cache"]}
|
||||
|
||||
@add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="ctrl",
|
||||
|
@ -25,7 +25,7 @@ from .file_utils import (
|
||||
MULTIPLE_CHOICE_DUMMY_INPUTS,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
)
|
||||
from .modeling_tf_outputs import (
|
||||
TFBaseModelOutput,
|
||||
@ -579,7 +579,7 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.distilbert = TFDistilBertMainLayer(config, name="distilbert") # Embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="distilbert-base-uncased",
|
||||
@ -630,7 +630,7 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel, TFMaskedLanguageModel
|
||||
def get_output_embeddings(self):
|
||||
return self.vocab_projector.input_embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="distilbert-base-uncased",
|
||||
@ -718,7 +718,7 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel, TFSeque
|
||||
)
|
||||
self.dropout = tf.keras.layers.Dropout(config.seq_classif_dropout)
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="distilbert-base-uncased",
|
||||
@ -800,7 +800,7 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel, TFTokenCla
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="distilbert-base-uncased",
|
||||
@ -895,7 +895,9 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic
|
||||
"""
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(
|
||||
DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")
|
||||
)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="distilbert-base-uncased",
|
||||
@ -1007,7 +1009,7 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn
|
||||
assert config.num_labels == 2, f"Incorrect number of labels {config.num_labels} instead of 2"
|
||||
self.dropout = tf.keras.layers.Dropout(config.qa_dropout)
|
||||
|
||||
@add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="distilbert-base-uncased",
|
||||
|
@ -11,7 +11,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_tf_outputs import (
|
||||
@ -719,7 +719,7 @@ class TFElectraModel(TFElectraPreTrainedModel):
|
||||
|
||||
self.electra = TFElectraMainLayer(config, name="electra")
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/electra-small-discriminator",
|
||||
@ -749,7 +749,7 @@ class TFElectraForPreTraining(TFElectraPreTrainedModel):
|
||||
self.electra = TFElectraMainLayer(config, name="electra")
|
||||
self.discriminator_predictions = TFElectraDiscriminatorPredictions(config, name="discriminator_predictions")
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=TFElectraForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(
|
||||
self,
|
||||
@ -858,7 +858,7 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel, TFMaskedLanguageModelingLos
|
||||
def get_output_embeddings(self):
|
||||
return self.generator_lm_head
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/electra-small-generator",
|
||||
@ -971,7 +971,7 @@ class TFElectraForSequenceClassification(TFElectraPreTrainedModel, TFSequenceCla
|
||||
self.electra = TFElectraMainLayer(config, name="electra")
|
||||
self.classifier = TFElectraClassificationHead(config, name="classifier")
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/electra-small-discriminator",
|
||||
@ -1072,7 +1072,7 @@ class TFElectraForMultipleChoice(TFElectraPreTrainedModel, TFMultipleChoiceLoss)
|
||||
"""
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/electra-small-discriminator",
|
||||
@ -1192,7 +1192,7 @@ class TFElectraForTokenClassification(TFElectraPreTrainedModel, TFTokenClassific
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/electra-small-discriminator",
|
||||
@ -1275,7 +1275,7 @@ class TFElectraForQuestionAnswering(TFElectraPreTrainedModel, TFQuestionAnswerin
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/electra-small-discriminator",
|
||||
|
@ -25,7 +25,12 @@ import tensorflow as tf
|
||||
from transformers.activations_tf import get_tf_activation
|
||||
|
||||
from .configuration_flaubert import FlaubertConfig
|
||||
from .file_utils import ModelOutput, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_model_forward,
|
||||
)
|
||||
from .modeling_tf_outputs import TFBaseModelOutput
|
||||
from .modeling_tf_utils import TFPreTrainedModel, TFSharedEmbeddings, get_initializer, keras_serializable, shape_list
|
||||
from .modeling_tf_xlm import (
|
||||
@ -217,7 +222,7 @@ class TFFlaubertModel(TFFlaubertPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.transformer = TFFlaubertMainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(FLAUBERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(FLAUBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="jplu/tf-flaubert-small-cased",
|
||||
@ -721,7 +726,7 @@ class TFFlaubertWithLMHeadModel(TFFlaubertPreTrainedModel):
|
||||
langs = None
|
||||
return {"inputs": inputs, "langs": langs}
|
||||
|
||||
@add_start_docstrings_to_callable(FLAUBERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(FLAUBERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="jplu/tf-flaubert-small-cased",
|
||||
|
@ -27,7 +27,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_tf_outputs import (
|
||||
@ -1148,7 +1148,7 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.funnel = TFFunnelBaseLayer(config, name="funnel")
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small-base",
|
||||
@ -1168,7 +1168,7 @@ class TFFunnelModel(TFFunnelPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.funnel = TFFunnelMainLayer(config, name="funnel")
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small",
|
||||
@ -1192,7 +1192,7 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
|
||||
self.funnel = TFFunnelMainLayer(config, name="funnel")
|
||||
self.discriminator_predictions = TFFunnelDiscriminatorPredictions(config, name="discriminator_predictions")
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=TFFunnelForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(
|
||||
self,
|
||||
@ -1259,7 +1259,7 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss)
|
||||
self.funnel = TFFunnelMainLayer(config, name="funnel")
|
||||
self.lm_head = TFFunnelMaskedLMHead(config, self.funnel.embeddings, name="lm_head")
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small",
|
||||
@ -1335,7 +1335,7 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass
|
||||
self.funnel = TFFunnelBaseLayer(config, name="funnel")
|
||||
self.classifier = TFFunnelClassificationHead(config, config.num_labels, name="classifier")
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small-base",
|
||||
@ -1421,7 +1421,7 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
|
||||
"""
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small-base",
|
||||
@ -1534,7 +1534,7 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small",
|
||||
@ -1613,7 +1613,7 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="funnel-transformer/small",
|
||||
|
@ -27,7 +27,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_tf_outputs import TFBaseModelOutputWithPast, TFCausalLMOutputWithPast
|
||||
@ -557,7 +557,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.transformer = TFGPT2MainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="gpt2",
|
||||
@ -591,7 +591,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
|
||||
return {"inputs": inputs, "past": past, "use_cache": kwargs["use_cache"]}
|
||||
|
||||
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="gpt2",
|
||||
@ -687,7 +687,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.transformer.wte
|
||||
|
||||
@add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=TFGPT2DoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(
|
||||
self,
|
||||
|
@ -19,7 +19,7 @@ import tensorflow as tf
|
||||
from transformers.activations_tf import get_tf_activation
|
||||
|
||||
from .configuration_longformer import LongformerConfig
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
|
||||
from .modeling_tf_outputs import (
|
||||
TFBaseModelOutput,
|
||||
TFBaseModelOutputWithPooling,
|
||||
@ -1624,7 +1624,7 @@ class TFLongformerModel(TFLongformerPreTrainedModel):
|
||||
|
||||
self.longformer = TFLongformerMainLayer(config, name="longformer")
|
||||
|
||||
@add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
def call(self, inputs, **kwargs):
|
||||
outputs = self.longformer(inputs, **kwargs)
|
||||
|
||||
@ -1648,7 +1648,7 @@ class TFLongformerForMaskedLM(TFLongformerPreTrainedModel, TFMaskedLanguageModel
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="allenai/longformer-base-4096",
|
||||
@ -1736,7 +1736,7 @@ class TFLongformerForQuestionAnswering(TFLongformerPreTrainedModel, TFQuestionAn
|
||||
name="qa_outputs",
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="allenai/longformer-large-4096-finetuned-triviaqa",
|
||||
|
@ -28,7 +28,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_tf_utils import TFPreTrainedModel, get_initializer, keras_serializable, shape_list
|
||||
@ -970,7 +970,7 @@ class TFLxmertModel(TFLxmertPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.lxmert = TFLxmertMainLayer(config, name="lxmert")
|
||||
|
||||
@add_start_docstrings_to_callable(LXMERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="unc-nlp/lxmert-base-uncased",
|
||||
@ -1224,7 +1224,7 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel):
|
||||
**({"obj_labels": obj_labels} if self.config.task_obj_predict else {}),
|
||||
}
|
||||
|
||||
@add_start_docstrings_to_callable(LXMERT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=TFLxmertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(
|
||||
self,
|
||||
|
@ -28,7 +28,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_tf_outputs import (
|
||||
@ -960,7 +960,7 @@ class TFMobileBertModel(TFMobileBertPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.mobilebert = TFMobileBertMainLayer(config, name="mobilebert")
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/mobilebert-uncased",
|
||||
@ -989,7 +989,7 @@ class TFMobileBertForPreTraining(TFMobileBertPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.mobilebert.embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=TFMobileBertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
@ -1040,7 +1040,7 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel
|
||||
def get_output_embeddings(self):
|
||||
return self.mobilebert.embeddings
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/mobilebert-uncased",
|
||||
@ -1126,7 +1126,7 @@ class TFMobileBertForNextSentencePrediction(TFMobileBertPreTrainedModel):
|
||||
self.mobilebert = TFMobileBertMainLayer(config, name="mobilebert")
|
||||
self.cls = TFMobileBertOnlyNSPHead(config, name="seq_relationship___cls")
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=TFNextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(self, inputs, **kwargs):
|
||||
r"""
|
||||
@ -1181,7 +1181,7 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/mobilebert-uncased",
|
||||
@ -1268,7 +1268,7 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/mobilebert-uncased",
|
||||
@ -1376,7 +1376,9 @@ class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoic
|
||||
"""
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(
|
||||
MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")
|
||||
)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/mobilebert-uncased",
|
||||
@ -1499,7 +1501,7 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="google/mobilebert-uncased",
|
||||
|
@ -27,7 +27,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_tf_outputs import TFBaseModelOutput, TFCausalLMOutput
|
||||
@ -495,7 +495,7 @@ class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.transformer = TFOpenAIGPTMainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="openai-gpt",
|
||||
@ -522,7 +522,7 @@ class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel, TFCausalLanguageModelin
|
||||
def get_output_embeddings(self):
|
||||
return self.transformer.tokens_embed
|
||||
|
||||
@add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="openai-gpt",
|
||||
@ -612,7 +612,7 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.transformer.tokens_embed
|
||||
|
||||
@add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=TFOpenAIGPTDoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(
|
||||
self,
|
||||
|
@ -24,7 +24,7 @@ from .file_utils import (
|
||||
MULTIPLE_CHOICE_DUMMY_INPUTS,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
)
|
||||
from .modeling_tf_outputs import (
|
||||
TFBaseModelOutput,
|
||||
@ -717,7 +717,7 @@ class TFRobertaModel(TFRobertaPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.roberta = TFRobertaMainLayer(config, name="roberta")
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="roberta-base",
|
||||
@ -776,7 +776,7 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel, TFMaskedLanguageModelingLos
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="roberta-base",
|
||||
@ -886,7 +886,7 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel, TFSequenceCla
|
||||
self.roberta = TFRobertaMainLayer(config, name="roberta")
|
||||
self.classifier = TFRobertaClassificationHead(config, name="classifier")
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="roberta-base",
|
||||
@ -978,7 +978,7 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss)
|
||||
"""
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="roberta-base",
|
||||
@ -1096,7 +1096,7 @@ class TFRobertaForTokenClassification(TFRobertaPreTrainedModel, TFTokenClassific
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="roberta-base",
|
||||
@ -1182,7 +1182,7 @@ class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel, TFQuestionAnswerin
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="roberta-base",
|
||||
|
@ -31,7 +31,7 @@ from .file_utils import (
|
||||
DUMMY_INPUTS,
|
||||
DUMMY_MASK,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_tf_outputs import TFSeq2SeqLMOutput, TFSeq2SeqModelOutput
|
||||
@ -980,7 +980,7 @@ class TFT5Model(TFT5PreTrainedModel):
|
||||
def get_decoder(self):
|
||||
return self.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(T5_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(T5_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=TFSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(
|
||||
self,
|
||||
@ -1177,7 +1177,7 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling
|
||||
def get_decoder(self):
|
||||
return self.decoder
|
||||
|
||||
@add_start_docstrings_to_callable(T5_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(T5_INPUTS_DOCSTRING)
|
||||
@replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(
|
||||
self,
|
||||
|
@ -23,7 +23,12 @@ from typing import List, Optional, Tuple
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_transfo_xl import TransfoXLConfig
|
||||
from .file_utils import ModelOutput, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_model_forward,
|
||||
)
|
||||
from .modeling_tf_transfo_xl_utilities import TFAdaptiveSoftmaxMask
|
||||
from .modeling_tf_utils import TFPreTrainedModel, get_initializer, keras_serializable, shape_list
|
||||
from .tokenization_utils import BatchEncoding
|
||||
@ -803,7 +808,7 @@ class TFTransfoXLModel(TFTransfoXLPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.transformer = TFTransfoXLMainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(TRANSFO_XL_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="transfo-xl-wt103",
|
||||
@ -873,7 +878,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
|
||||
def init_mems(self, bsz):
|
||||
return self.transformer.init_mems(bsz)
|
||||
|
||||
@add_start_docstrings_to_callable(TRANSFO_XL_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="transfo-xl-wt103",
|
||||
|
@ -32,7 +32,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
)
|
||||
from .modeling_tf_outputs import (
|
||||
TFBaseModelOutput,
|
||||
@ -696,7 +696,7 @@ class TFXLMModel(TFXLMPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.transformer = TFXLMMainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlm-mlm-en-2048",
|
||||
@ -775,7 +775,7 @@ class TFXLMWithLMHeadModel(TFXLMPreTrainedModel):
|
||||
langs = None
|
||||
return {"inputs": inputs, "langs": langs}
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlm-mlm-en-2048",
|
||||
@ -813,7 +813,7 @@ class TFXLMForSequenceClassification(TFXLMPreTrainedModel, TFSequenceClassificat
|
||||
self.transformer = TFXLMMainLayer(config, name="transformer")
|
||||
self.sequence_summary = TFSequenceSummary(config, initializer_range=config.init_std, name="sequence_summary")
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlm-mlm-en-2048",
|
||||
@ -914,7 +914,7 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss):
|
||||
"langs": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS),
|
||||
}
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlm-mlm-en-2048",
|
||||
@ -1056,7 +1056,7 @@ class TFXLMForTokenClassification(TFXLMPreTrainedModel, TFTokenClassificationLos
|
||||
config.num_labels, kernel_initializer=get_initializer(config.init_std), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlm-mlm-en-2048",
|
||||
@ -1143,7 +1143,7 @@ class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel, TFQuestionAnsweringL
|
||||
config.num_labels, kernel_initializer=get_initializer(config.init_std), name="qa_outputs"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlm-mlm-en-2048",
|
||||
|
@ -30,7 +30,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_tf_utils import (
|
||||
@ -1130,7 +1130,7 @@ class TFXLNetModel(TFXLNetPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.transformer = TFXLNetMainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlnet-base-cased",
|
||||
@ -1197,7 +1197,7 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
|
||||
return inputs
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=TFXLNetLMHeadModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def call(
|
||||
self,
|
||||
@ -1314,7 +1314,7 @@ class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel, TFSequenceClassif
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="logits_proj"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlnet-base-cased",
|
||||
@ -1417,7 +1417,7 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss):
|
||||
"""
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlnet-base-cased",
|
||||
@ -1552,7 +1552,7 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel, TFTokenClassificatio
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlnet-base-cased",
|
||||
@ -1639,7 +1639,7 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel, TFQuestionAnswer
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlnet-base-cased",
|
||||
|
@ -26,7 +26,12 @@ import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from .configuration_transfo_xl import TransfoXLConfig
|
||||
from .file_utils import ModelOutput, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_model_forward,
|
||||
)
|
||||
from .modeling_transfo_xl_utilities import ProjectedAdaptiveLogSoftmax
|
||||
from .modeling_utils import PreTrainedModel
|
||||
from .utils import logging
|
||||
@ -830,7 +835,7 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
|
||||
|
||||
return new_mems
|
||||
|
||||
@add_start_docstrings_to_callable(TRANSFO_XL_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="transfo-xl-wt103",
|
||||
@ -1018,7 +1023,7 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
|
||||
def init_mems(self, bsz):
|
||||
return self.transformer.init_mems(bsz)
|
||||
|
||||
@add_start_docstrings_to_callable(TRANSFO_XL_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING)
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="transfo-xl-wt103",
|
||||
|
@ -35,7 +35,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_outputs import (
|
||||
@ -486,7 +486,7 @@ class XLMModel(XLMPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.attentions[layer].prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlm-mlm-en-2048",
|
||||
@ -703,7 +703,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
|
||||
langs = None
|
||||
return {"input_ids": input_ids, "langs": langs}
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlm-mlm-en-2048",
|
||||
@ -781,7 +781,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlm-mlm-en-2048",
|
||||
@ -868,7 +868,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlm-mlm-en-2048",
|
||||
@ -972,7 +972,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=XLMForQuestionAnsweringOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1091,7 +1091,7 @@ class XLMForTokenClassification(XLMPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlm-mlm-en-2048",
|
||||
@ -1184,7 +1184,7 @@ class XLMForMultipleChoice(XLMPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, num_choicec, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, num_choicec, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlm-mlm-en-2048",
|
||||
|
@ -32,7 +32,7 @@ from .file_utils import (
|
||||
ModelOutput,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
replace_return_docstrings,
|
||||
)
|
||||
from .modeling_utils import (
|
||||
@ -1064,7 +1064,7 @@ class XLNetModel(XLNetPreTrainedModel):
|
||||
pos_emb = pos_emb.to(self.device)
|
||||
return pos_emb
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlnet-base-cased",
|
||||
@ -1342,7 +1342,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
|
||||
|
||||
return inputs
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=XLNetLMHeadModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
@ -1465,7 +1465,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlnet-base-cased",
|
||||
@ -1558,7 +1558,7 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlnet-base-cased",
|
||||
@ -1655,7 +1655,7 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlnet-base-cased",
|
||||
@ -1756,7 +1756,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xlnet-base-cased",
|
||||
@ -1868,7 +1868,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=XLNetForQuestionAnsweringOutput, config_class=_CONFIG_FOR_DOC)
|
||||
def forward(
|
||||
self,
|
||||
|
@ -26,7 +26,7 @@ from .file_utils import (
|
||||
MULTIPLE_CHOICE_DUMMY_INPUTS,
|
||||
add_code_sample_docstrings,
|
||||
add_start_docstrings,
|
||||
add_start_docstrings_to_callable,
|
||||
add_start_docstrings_to_model_forward,
|
||||
)
|
||||
from .modeling_tf_outputs import (
|
||||
TFBaseModelOutputWithPooling,
|
||||
@ -360,7 +360,7 @@ class TFXxxModel(TFXxxPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.transformer = TFXxxMainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-cased",
|
||||
@ -383,7 +383,7 @@ class TFXxxForMaskedLM(TFXxxPreTrainedModel, TFMaskedLanguageModelingLoss):
|
||||
self.transformer = TFXxxMainLayer(config, name="transformer")
|
||||
self.mlm = TFXxxMLMHead(config, self.transformer.embeddings, name="mlm")
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-cased",
|
||||
@ -465,7 +465,7 @@ class TFXxxForSequenceClassification(TFXxxPreTrainedModel, TFSequenceClassificat
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-cased",
|
||||
@ -557,7 +557,7 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss):
|
||||
"""
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-cased",
|
||||
@ -680,7 +680,7 @@ class TFXxxForTokenClassification(TFXxxPreTrainedModel, TFTokenClassificationLos
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-cased",
|
||||
@ -761,7 +761,7 @@ class TFXxxForQuestionAnswering(TFXxxPreTrainedModel, TFQuestionAnsweringLoss):
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-cased",
|
||||
|
@ -26,7 +26,7 @@ from torch import nn
|
||||
from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from .configuration_xxx import XxxConfig
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
|
||||
from .modeling_outputs import (
|
||||
BaseModelOutputWithPooling,
|
||||
MaskedLMOutput,
|
||||
@ -309,7 +309,7 @@ class XxxModel(XxxPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-uncased",
|
||||
@ -391,7 +391,7 @@ class XxxForMaskedLM(XxxPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-uncased",
|
||||
@ -468,7 +468,7 @@ class XxxForSequenceClassification(XxxPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-uncased",
|
||||
@ -551,7 +551,7 @@ class XxxForMultipleChoice(XxxPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-uncased",
|
||||
@ -641,7 +641,7 @@ class XxxForTokenClassification(XxxPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-uncased",
|
||||
@ -726,7 +726,7 @@ class XxxForQuestionAnswering(XxxPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-uncased",
|
||||
|
Loading…
Reference in New Issue
Block a user