From 790f1c9545f4a83b97bf75640be82b2112c3efe7 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Tue, 3 Aug 2021 08:28:25 +0200 Subject: [PATCH] Fix template for inputs docstrings (#12976) --- .../models/big_bird/modeling_big_bird.py | 18 +++++++++--------- .../models/canine/modeling_canine.py | 16 ++++++++-------- .../models/convbert/modeling_convbert.py | 14 +++++++------- .../models/deberta/modeling_deberta.py | 10 +++++----- .../models/deberta_v2/modeling_deberta_v2.py | 10 +++++----- src/transformers/models/deit/modeling_deit.py | 6 +++--- .../models/ibert/modeling_ibert.py | 2 +- .../models/mpnet/modeling_mpnet.py | 4 ++-- .../models/rembert/modeling_rembert.py | 18 +++++++++--------- .../models/roberta/modeling_roberta.py | 2 +- .../models/roformer/modeling_roformer.py | 16 ++++++++-------- .../models/squeezebert/modeling_squeezebert.py | 12 ++++++------ src/transformers/models/vit/modeling_vit.py | 4 ++-- .../models/xlnet/modeling_xlnet.py | 2 +- ...ing_{{cookiecutter.lowercase_modelname}}.py | 18 +++++++++--------- 15 files changed, 76 insertions(+), 76 deletions(-) diff --git a/src/transformers/models/big_bird/modeling_big_bird.py b/src/transformers/models/big_bird/modeling_big_bird.py index 15aef2b789a..51cc371f6c2 100755 --- a/src/transformers/models/big_bird/modeling_big_bird.py +++ b/src/transformers/models/big_bird/modeling_big_bird.py @@ -1789,7 +1789,7 @@ BIG_BIRD_START_DOCSTRING = r""" BIG_BIRD_INPUTS_DOCSTRING = r""" Args: - input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): + input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`): Indices of input sequence tokens in the vocabulary. Indices can be obtained using :class:`transformers.BigBirdTokenizer`. See @@ -1797,14 +1797,14 @@ BIG_BIRD_INPUTS_DOCSTRING = r""" details. `What are input IDs? <../glossary.html#input-ids>`__ - attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): + attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`): Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: - 1 for tokens that are **not masked**, - 0 for tokens that are **masked**. `What are attention masks? <../glossary.html#attention-mask>`__ - token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, 1]``: @@ -1812,7 +1812,7 @@ BIG_BIRD_INPUTS_DOCSTRING = r""" - 1 corresponds to a `sentence B` token. `What are token type IDs? <../glossary.html#token-type-ids>`_ - position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, config.max_position_embeddings - 1]``. @@ -1823,7 +1823,7 @@ BIG_BIRD_INPUTS_DOCSTRING = r""" - 1 indicates the head is **not masked**, - 0 indicates the head is **masked**. - inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): + inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`): Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. @@ -1967,7 +1967,7 @@ class BigBirdModel(BigBirdPreTrainedModel): self.attention_type = value self.encoder.set_attention_type(value) - @add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -2374,7 +2374,7 @@ class BigBirdForMaskedLM(BigBirdPreTrainedModel): def set_output_embeddings(self, new_embeddings): self.cls.predictions.decoder = new_embeddings - @add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -2832,7 +2832,7 @@ class BigBirdForTokenClassification(BigBirdPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -2940,7 +2940,7 @@ class BigBirdForQuestionAnswering(BigBirdPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(BIG_BIRD_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/bigbird-base-trivia-itc", diff --git a/src/transformers/models/canine/modeling_canine.py b/src/transformers/models/canine/modeling_canine.py index a8bd544a899..dd273463dea 100644 --- a/src/transformers/models/canine/modeling_canine.py +++ b/src/transformers/models/canine/modeling_canine.py @@ -928,7 +928,7 @@ CANINE_START_DOCSTRING = r""" CANINE_INPUTS_DOCSTRING = r""" Args: - input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): + input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`): Indices of input sequence tokens in the vocabulary. Indices can be obtained using :class:`transformers.CanineTokenizer`. See @@ -936,14 +936,14 @@ CANINE_INPUTS_DOCSTRING = r""" details. `What are input IDs? <../glossary.html#input-ids>`__ - attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): + attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`): Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: - 1 for tokens that are **not masked**, - 0 for tokens that are **masked**. `What are attention masks? <../glossary.html#attention-mask>`__ - token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, 1]``: @@ -951,7 +951,7 @@ CANINE_INPUTS_DOCSTRING = r""" - 1 corresponds to a `sentence B` token. `What are token type IDs? <../glossary.html#token-type-ids>`_ - position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, config.max_position_embeddings - 1]``. @@ -962,7 +962,7 @@ CANINE_INPUTS_DOCSTRING = r""" - 1 indicates the head is **not masked**, - 0 indicates the head is **masked**. - inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): + inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`): Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. @@ -1088,7 +1088,7 @@ class CanineModel(CaninePreTrainedModel): # `repeated`: [batch_size, char_seq_len, molecule_hidden_size] return torch.cat([repeated, remainder_repeated], dim=-2) - @add_start_docstrings_to_model_forward(CANINE_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(CANINE_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -1458,7 +1458,7 @@ class CanineForTokenClassification(CaninePreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(CANINE_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(CANINE_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -1545,7 +1545,7 @@ class CanineForQuestionAnswering(CaninePreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(CANINE_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(CANINE_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, diff --git a/src/transformers/models/convbert/modeling_convbert.py b/src/transformers/models/convbert/modeling_convbert.py index 3625f65ff66..43b54275521 100755 --- a/src/transformers/models/convbert/modeling_convbert.py +++ b/src/transformers/models/convbert/modeling_convbert.py @@ -689,7 +689,7 @@ CONVBERT_START_DOCSTRING = r""" CONVBERT_INPUTS_DOCSTRING = r""" Args: - input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): + input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`): Indices of input sequence tokens in the vocabulary. Indices can be obtained using :class:`transformers.ConvBertTokenizer`. See @@ -697,7 +697,7 @@ CONVBERT_INPUTS_DOCSTRING = r""" details. `What are input IDs? <../glossary.html#input-ids>`__ - attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): + attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`): Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: @@ -705,7 +705,7 @@ CONVBERT_INPUTS_DOCSTRING = r""" - 0 for tokens that are **masked**. `What are attention masks? <../glossary.html#attention-mask>`__ - token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, 1]``: @@ -714,7 +714,7 @@ CONVBERT_INPUTS_DOCSTRING = r""" - 1 corresponds to a `sentence B` token. `What are token type IDs? <../glossary.html#token-type-ids>`_ - position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, config.max_position_embeddings - 1]``. @@ -726,7 +726,7 @@ CONVBERT_INPUTS_DOCSTRING = r""" - 1 indicates the head is **not masked**, - 0 indicates the head is **masked**. - inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): + inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`): Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. @@ -1163,7 +1163,7 @@ class ConvBertForTokenClassification(ConvBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -1250,7 +1250,7 @@ class ConvBertForQuestionAnswering(ConvBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, diff --git a/src/transformers/models/deberta/modeling_deberta.py b/src/transformers/models/deberta/modeling_deberta.py index b1aaee0c26e..f7708eba486 100644 --- a/src/transformers/models/deberta/modeling_deberta.py +++ b/src/transformers/models/deberta/modeling_deberta.py @@ -794,7 +794,7 @@ DEBERTA_START_DOCSTRING = r""" DEBERTA_INPUTS_DOCSTRING = r""" Args: - input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): + input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`): Indices of input sequence tokens in the vocabulary. Indices can be obtained using :class:`transformers.DebertaTokenizer`. See @@ -802,14 +802,14 @@ DEBERTA_INPUTS_DOCSTRING = r""" details. `What are input IDs? <../glossary.html#input-ids>`__ - attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): + attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`): Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: - 1 for tokens that are **not masked**, - 0 for tokens that are **masked**. `What are attention masks? <../glossary.html#attention-mask>`__ - token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, 1]``: @@ -817,12 +817,12 @@ DEBERTA_INPUTS_DOCSTRING = r""" - 1 corresponds to a `sentence B` token. `What are token type IDs? <../glossary.html#token-type-ids>`_ - position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, config.max_position_embeddings - 1]``. `What are position IDs? <../glossary.html#position-ids>`_ - inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): + inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`): Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. diff --git a/src/transformers/models/deberta_v2/modeling_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_deberta_v2.py index eddd06129c8..f1f975fc8af 100644 --- a/src/transformers/models/deberta_v2/modeling_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_deberta_v2.py @@ -915,7 +915,7 @@ DEBERTA_START_DOCSTRING = r""" DEBERTA_INPUTS_DOCSTRING = r""" Args: - input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): + input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`): Indices of input sequence tokens in the vocabulary. Indices can be obtained using :class:`transformers.DebertaV2Tokenizer`. See @@ -923,14 +923,14 @@ DEBERTA_INPUTS_DOCSTRING = r""" details. `What are input IDs? <../glossary.html#input-ids>`__ - attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): + attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`): Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: - 1 for tokens that are **not masked**, - 0 for tokens that are **masked**. `What are attention masks? <../glossary.html#attention-mask>`__ - token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, 1]``: @@ -938,12 +938,12 @@ DEBERTA_INPUTS_DOCSTRING = r""" - 1 corresponds to a `sentence B` token. `What are token type IDs? <../glossary.html#token-type-ids>`_ - position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, config.max_position_embeddings - 1]``. `What are position IDs? <../glossary.html#position-ids>`_ - inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): + inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`): Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. diff --git a/src/transformers/models/deit/modeling_deit.py b/src/transformers/models/deit/modeling_deit.py index 0a6a4df8403..a4155964711 100644 --- a/src/transformers/models/deit/modeling_deit.py +++ b/src/transformers/models/deit/modeling_deit.py @@ -466,7 +466,7 @@ class DeiTModel(DeiTPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_model_forward(DEIT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(DEIT_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -570,7 +570,7 @@ class DeiTForImageClassification(DeiTPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(DEIT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(DEIT_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -707,7 +707,7 @@ class DeiTForImageClassificationWithTeacher(DeiTPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(DEIT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(DEIT_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=DeiTForImageClassificationWithTeacherOutput, config_class=_CONFIG_FOR_DOC) def forward( self, diff --git a/src/transformers/models/ibert/modeling_ibert.py b/src/transformers/models/ibert/modeling_ibert.py index 48344f0a254..01f6c2d014a 100644 --- a/src/transformers/models/ibert/modeling_ibert.py +++ b/src/transformers/models/ibert/modeling_ibert.py @@ -774,7 +774,7 @@ class IBertModel(IBertPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_model_forward(IBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(IBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, diff --git a/src/transformers/models/mpnet/modeling_mpnet.py b/src/transformers/models/mpnet/modeling_mpnet.py index 513910b893b..16540670b73 100644 --- a/src/transformers/models/mpnet/modeling_mpnet.py +++ b/src/transformers/models/mpnet/modeling_mpnet.py @@ -509,7 +509,7 @@ class MPNetModel(MPNetPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -867,7 +867,7 @@ class MPNetForTokenClassification(MPNetPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, diff --git a/src/transformers/models/rembert/modeling_rembert.py b/src/transformers/models/rembert/modeling_rembert.py index 37b861c8945..3582df8a22f 100755 --- a/src/transformers/models/rembert/modeling_rembert.py +++ b/src/transformers/models/rembert/modeling_rembert.py @@ -680,7 +680,7 @@ REMBERT_START_DOCSTRING = r""" REMBERT_INPUTS_DOCSTRING = r""" Args: - input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): + input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`): Indices of input sequence tokens in the vocabulary. Indices can be obtained using :class:`transformers.RemBertTokenizer`. See @@ -688,14 +688,14 @@ REMBERT_INPUTS_DOCSTRING = r""" details. `What are input IDs? <../glossary.html#input-ids>`__ - attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): + attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`): Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: - 1 for tokens that are **not masked**, - 0 for tokens that are **masked**. `What are attention masks? <../glossary.html#attention-mask>`__ - token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, 1]``: @@ -703,7 +703,7 @@ REMBERT_INPUTS_DOCSTRING = r""" - 1 corresponds to a `sentence B` token. `What are token type IDs? <../glossary.html#token-type-ids>`_ - position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, config.max_position_embeddings - 1]``. @@ -714,7 +714,7 @@ REMBERT_INPUTS_DOCSTRING = r""" - 1 indicates the head is **not masked**, - 0 indicates the head is **masked**. - inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): + inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`): Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. @@ -772,7 +772,7 @@ class RemBertModel(RemBertPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="rembert", @@ -925,7 +925,7 @@ class RemBertForMaskedLM(RemBertPreTrainedModel): def set_output_embeddings(self, new_embeddings): self.cls.predictions.decoder = new_embeddings - @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="rembert", @@ -1343,7 +1343,7 @@ class RemBertForTokenClassification(RemBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="rembert", @@ -1431,7 +1431,7 @@ class RemBertForQuestionAnswering(RemBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="rembert", diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index fc377e8e40c..8bce52bb403 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -730,7 +730,7 @@ class RobertaModel(RobertaPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, diff --git a/src/transformers/models/roformer/modeling_roformer.py b/src/transformers/models/roformer/modeling_roformer.py index 0e943617b7c..67c602f822e 100644 --- a/src/transformers/models/roformer/modeling_roformer.py +++ b/src/transformers/models/roformer/modeling_roformer.py @@ -744,7 +744,7 @@ ROFORMER_START_DOCSTRING = r""" ROFORMER_INPUTS_DOCSTRING = r""" Args: - input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): + input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`): Indices of input sequence tokens in the vocabulary. Indices can be obtained using :class:`transformers.RoFormerTokenizer`. See @@ -752,14 +752,14 @@ ROFORMER_INPUTS_DOCSTRING = r""" details. `What are input IDs? <../glossary.html#input-ids>`__ - attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): + attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`): Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: - 1 for tokens that are **not masked**, - 0 for tokens that are **masked**. `What are attention masks? <../glossary.html#attention-mask>`__ - token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, 1]``: @@ -773,7 +773,7 @@ ROFORMER_INPUTS_DOCSTRING = r""" - 1 indicates the head is **not masked**, - 0 indicates the head is **masked**. - inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): + inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`): Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. @@ -832,7 +832,7 @@ class RoFormerModel(RoFormerPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -981,7 +981,7 @@ class RoFormerForMaskedLM(RoFormerPreTrainedModel): def set_output_embeddings(self, new_embeddings): self.cls.predictions.decoder = new_embeddings - @add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -1412,7 +1412,7 @@ class RoFormerForTokenClassification(RoFormerPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -1499,7 +1499,7 @@ class RoFormerForQuestionAnswering(RoFormerPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(ROFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, diff --git a/src/transformers/models/squeezebert/modeling_squeezebert.py b/src/transformers/models/squeezebert/modeling_squeezebert.py index 38c54bf9d95..32691d9be3e 100644 --- a/src/transformers/models/squeezebert/modeling_squeezebert.py +++ b/src/transformers/models/squeezebert/modeling_squeezebert.py @@ -569,7 +569,7 @@ class SqueezeBertModel(SqueezeBertPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -662,7 +662,7 @@ class SqueezeBertForMaskedLM(SqueezeBertPreTrainedModel): def set_output_embeddings(self, new_embeddings): self.cls.predictions.decoder = new_embeddings - @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -741,7 +741,7 @@ class SqueezeBertForSequenceClassification(SqueezeBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -839,7 +839,7 @@ class SqueezeBertForMultipleChoice(SqueezeBertPreTrainedModel): self.init_weights() @add_start_docstrings_to_model_forward( - SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)") + SQUEEZEBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") ) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, @@ -932,7 +932,7 @@ class SqueezeBertForTokenClassification(SqueezeBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -1019,7 +1019,7 @@ class SqueezeBertForQuestionAnswering(SqueezeBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, diff --git a/src/transformers/models/vit/modeling_vit.py b/src/transformers/models/vit/modeling_vit.py index 893a733f3c2..308755234e0 100644 --- a/src/transformers/models/vit/modeling_vit.py +++ b/src/transformers/models/vit/modeling_vit.py @@ -452,7 +452,7 @@ class ViTModel(ViTPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -555,7 +555,7 @@ class ViTForImageClassification(ViTPreTrainedModel): self.init_weights() - @add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) def forward( self, diff --git a/src/transformers/models/xlnet/modeling_xlnet.py b/src/transformers/models/xlnet/modeling_xlnet.py index cd0cb73ac18..b45f158d6a6 100755 --- a/src/transformers/models/xlnet/modeling_xlnet.py +++ b/src/transformers/models/xlnet/modeling_xlnet.py @@ -861,7 +861,7 @@ XLNET_START_DOCSTRING = r""" XLNET_INPUTS_DOCSTRING = r""" Args: - input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): + input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`): Indices of input sequence tokens in the vocabulary. Indices can be obtained using :class:`transformers.XLNetTokenizer`. See diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py index 2b7bab9d689..d19d225c1c1 100755 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py @@ -696,7 +696,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel): {{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING = r""" Args: - input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`): + input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`): Indices of input sequence tokens in the vocabulary. Indices can be obtained using :class:`transformers.{{cookiecutter.camelcase_modelname}}Tokenizer`. @@ -704,14 +704,14 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel): :func:`transformers.PreTrainedTokenizer.__call__` for details. `What are input IDs? <../glossary.html#input-ids>`__ - attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): + attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`): Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: - 1 for tokens that are **not masked**, - 0 for tokens that are **masked**. `What are attention masks? <../glossary.html#attention-mask>`__ - token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, 1]``: @@ -719,7 +719,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel): - 1 corresponds to a `sentence B` token. `What are token type IDs? <../glossary.html#token-type-ids>`_ - position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): + position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`): Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, config.max_position_embeddings - 1]``. @@ -730,7 +730,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel): - 1 indicates the head is **not masked**, - 0 indicates the head is **masked**. - inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): + inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`): Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. @@ -788,7 +788,7 @@ class {{cookiecutter.camelcase_modelname}}Model({{cookiecutter.camelcase_modelna for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -947,7 +947,7 @@ class {{cookiecutter.camelcase_modelname}}ForMaskedLM({{cookiecutter.camelcase_m def set_output_embeddings(self, new_embeddings): self.cls.predictions.decoder = new_embeddings - @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -1385,7 +1385,7 @@ class {{cookiecutter.camelcase_modelname}}ForTokenClassification({{cookiecutter. self.init_weights() - @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, @@ -1472,7 +1472,7 @@ class {{cookiecutter.camelcase_modelname}}ForQuestionAnswering({{cookiecutter.ca self.init_weights() - @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,