mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-03 03:31:05 +06:00
add doctests for bart like seq2seq models (#15987)
* boom boom * enable doctest for few seq2seq models * add seq2seq models in documentation_tests.txt * fix docstring blenderbot * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * fix seq classif doc sample * don't check loss for seq classif examples * +IGNORE_OUTPUT => +IGNORE_RESULT * fix _SEQ_CLASS_EXPECTED_OUTPUT_SHAPE * fix some docs * more fixes * last fix (hopefully) * fix big bird gen example * fix mbart gen example Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
parent
b256f3518d
commit
a69e185074
@ -1012,6 +1012,8 @@ PT_QUESTION_ANSWERING_SAMPLE = r"""
|
||||
>>> from transformers import {processor_class}, {model_class}
|
||||
>>> import torch
|
||||
|
||||
>>> torch.manual_seed(0) # doctest: +IGNORE_RESULT
|
||||
|
||||
>>> tokenizer = {processor_class}.from_pretrained("{checkpoint}")
|
||||
>>> model = {model_class}.from_pretrained("{checkpoint}")
|
||||
|
||||
@ -1022,8 +1024,16 @@ PT_QUESTION_ANSWERING_SAMPLE = r"""
|
||||
|
||||
>>> outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions)
|
||||
>>> loss = outputs.loss
|
||||
>>> round(loss.item(), 2)
|
||||
{expected_loss}
|
||||
|
||||
>>> start_scores = outputs.start_logits
|
||||
>>> list(start_scores.shape)
|
||||
{expected_output}
|
||||
|
||||
>>> end_scores = outputs.end_logits
|
||||
>>> list(end_scores.shape)
|
||||
{expected_output}
|
||||
```
|
||||
"""
|
||||
|
||||
@ -1031,33 +1041,40 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
|
||||
Example of single-label classification:
|
||||
|
||||
```python
|
||||
>>> from transformers import {processor_class}, {model_class}
|
||||
>>> import torch
|
||||
>>> from transformers import {processor_class}, {model_class}
|
||||
|
||||
>>> torch.manual_seed(0) # doctest: +IGNORE_RESULT
|
||||
|
||||
>>> tokenizer = {processor_class}.from_pretrained("{checkpoint}")
|
||||
>>> model = {model_class}.from_pretrained("{checkpoint}")
|
||||
>>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=2)
|
||||
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
>>> outputs = model(**inputs, labels=labels)
|
||||
>>> loss = outputs.loss
|
||||
>>> logits = outputs.logits
|
||||
>>> list(logits.shape)
|
||||
{expected_output}
|
||||
```
|
||||
|
||||
Example of multi-label classification:
|
||||
|
||||
```python
|
||||
>>> from transformers import {processor_class}, {model_class}
|
||||
>>> import torch
|
||||
>>> from transformers import {processor_class}, {model_class}
|
||||
|
||||
>>> torch.manual_seed(0) # doctest: +IGNORE_RESULT
|
||||
|
||||
>>> tokenizer = {processor_class}.from_pretrained("{checkpoint}")
|
||||
>>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification")
|
||||
>>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification", num_labels=2)
|
||||
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> labels = torch.tensor([[1, 1]], dtype=torch.float) # need dtype=float for BCEWithLogitsLoss
|
||||
>>> outputs = model(**inputs, labels=labels)
|
||||
>>> loss = outputs.loss
|
||||
>>> logits = outputs.logits
|
||||
>>> list(logits.shape)
|
||||
{expected_output}
|
||||
```
|
||||
"""
|
||||
|
||||
|
@ -48,14 +48,24 @@ from .configuration_bart import BartConfig
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
_CHECKPOINT_FOR_DOC = "facebook/bart-large"
|
||||
_CHECKPOINT_FOR_DOC = "facebook/bart-base"
|
||||
_CONFIG_FOR_DOC = "BartConfig"
|
||||
_TOKENIZER_FOR_DOC = "BartTokenizer"
|
||||
|
||||
# Base model docstring
|
||||
_EXPECTED_OUTPUT_SHAPE = [1, 8, 768]
|
||||
|
||||
# SequenceClassification docstring
|
||||
_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2]
|
||||
|
||||
# QuestionAsnwering docstring
|
||||
_QA_EXPECTED_LOSS = 2.98
|
||||
_QA_EXPECTED_OUTPUT_SHAPE = [1, 17]
|
||||
|
||||
|
||||
BART_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"facebook/bart-large",
|
||||
# See all BART models at https://huggingface.co/models?filter=bart
|
||||
# see all BART models at https://huggingface.co/models?filter=bart
|
||||
]
|
||||
|
||||
|
||||
@ -542,12 +552,17 @@ BART_GENERATION_EXAMPLE = r"""
|
||||
>>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
|
||||
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
|
||||
|
||||
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs."
|
||||
>>> ARTICLE_TO_SUMMARIZE = (
|
||||
... "PG&E stated it scheduled the blackouts in response to forecasts for high winds "
|
||||
... "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were "
|
||||
... "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."
|
||||
... )
|
||||
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt")
|
||||
|
||||
>>> # Generate Summary
|
||||
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5)
|
||||
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
|
||||
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=2, max_length=20)
|
||||
>>> tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
||||
'PG&E scheduled the blackouts in response to forecasts for high winds amid dry conditions'
|
||||
```
|
||||
|
||||
Mask filling example:
|
||||
@ -555,10 +570,10 @@ BART_GENERATION_EXAMPLE = r"""
|
||||
```python
|
||||
>>> from transformers import BartTokenizer, BartForConditionalGeneration
|
||||
|
||||
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")
|
||||
>>> TXT = "My friends are <mask> but they eat too many carbs."
|
||||
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
|
||||
>>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-base")
|
||||
|
||||
>>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large")
|
||||
>>> TXT = "My friends are <mask> but they eat too many carbs."
|
||||
>>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
|
||||
>>> logits = model(input_ids).logits
|
||||
|
||||
@ -567,6 +582,7 @@ BART_GENERATION_EXAMPLE = r"""
|
||||
>>> values, predictions = probs.topk(5)
|
||||
|
||||
>>> tokenizer.decode(predictions).split()
|
||||
['not', 'good', 'healthy', 'great', 'very']
|
||||
```
|
||||
"""
|
||||
|
||||
@ -641,11 +657,10 @@ BART_INPUTS_DOCSTRING = r"""
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
|
||||
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
|
||||
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
||||
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids`
|
||||
you can choose to directly pass an embedded representation. This is useful if you want more control over
|
||||
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup
|
||||
matrix.
|
||||
`decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
|
||||
`(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
|
||||
can choose to directly pass an embedded representation. This is useful if you want more control over how to
|
||||
convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
|
||||
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
|
||||
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
|
||||
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
|
||||
@ -966,8 +981,8 @@ class BartDecoder(BartPretrainedModel):
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
|
||||
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
|
||||
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor`
|
||||
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
||||
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
|
||||
control over how to convert `input_ids` indices into associated vectors than the model's internal
|
||||
embedding lookup matrix.
|
||||
@ -1153,6 +1168,7 @@ class BartModel(BartPretrainedModel):
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=Seq2SeqModelOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_EXPECTED_OUTPUT_SHAPE,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
@ -1434,6 +1450,7 @@ class BartForSequenceClassification(BartPretrainedModel):
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=Seq2SeqSequenceClassifierOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
@ -1558,6 +1575,8 @@ class BartForQuestionAnswering(BartPretrainedModel):
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=Seq2SeqQuestionAnsweringModelOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_loss=_QA_EXPECTED_LOSS,
|
||||
expected_output=_QA_EXPECTED_OUTPUT_SHAPE,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
@ -1789,13 +1808,16 @@ class BartForCausalLM(BartPretrainedModel):
|
||||
```python
|
||||
>>> from transformers import BartTokenizer, BartForCausalLM
|
||||
|
||||
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")
|
||||
>>> model = BartForCausalLM.from_pretrained("facebook/bart-large", add_cross_attention=False)
|
||||
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
|
||||
>>> model = BartForCausalLM.from_pretrained("facebook/bart-base", add_cross_attention=False)
|
||||
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> logits = outputs.logits
|
||||
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
|
||||
>>> list(logits.shape) == expected_shape
|
||||
True
|
||||
```"""
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
|
@ -53,6 +53,16 @@ _CHECKPOINT_FOR_DOC = "google/bigbird-pegasus-large-arxiv"
|
||||
_CONFIG_FOR_DOC = "BigBirdPegasusConfig"
|
||||
_TOKENIZER_FOR_DOC = "PegasusTokenizer"
|
||||
|
||||
# Base model docstring
|
||||
_EXPECTED_OUTPUT_SHAPE = [1, 7, 1024]
|
||||
|
||||
# SequenceClassification docstring
|
||||
_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2]
|
||||
|
||||
# QuestionAsnwering docstring
|
||||
_QA_EXPECTED_LOSS = 2.56
|
||||
_QA_EXPECTED_OUTPUT_SHAPE = [1, 12]
|
||||
|
||||
|
||||
BIGBIRD_PEGASUS_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"google/bigbird-pegasus-large-arxiv",
|
||||
@ -1627,12 +1637,20 @@ BIGBIRD_PEGASUS_GENERATION_EXAMPLE = r"""
|
||||
>>> model = BigBirdPegasusForConditionalGeneration.from_pretrained("google/bigbird-pegasus-large-arxiv")
|
||||
>>> tokenizer = PegasusTokenizer.from_pretrained("google/bigbird-pegasus-large-arxiv")
|
||||
|
||||
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs."
|
||||
>>> ARTICLE_TO_SUMMARIZE = (
|
||||
... "The dominant sequence transduction models are based on complex recurrent or convolutional neural "
|
||||
... "networks in an encoder-decoder configuration. The best performing models also connect the encoder "
|
||||
... "and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, "
|
||||
... "based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. "
|
||||
... "Experiments on two machine translation tasks show these models to be superior in quality "
|
||||
... "while being more parallelizable and requiring significantly less time to train."
|
||||
... )
|
||||
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=4096, return_tensors="pt", truncation=True)
|
||||
|
||||
>>> # Generate Summary
|
||||
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5)
|
||||
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
|
||||
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=15)
|
||||
>>> tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
||||
'dominant sequence models are based on recurrent or convolutional neural networks .'
|
||||
```
|
||||
"""
|
||||
|
||||
@ -1684,11 +1702,10 @@ BIGBIRD_PEGASUS_INPUTS_DOCSTRING = r"""
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
|
||||
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
|
||||
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
||||
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids`
|
||||
you can choose to directly pass an embedded representation. This is useful if you want more control over
|
||||
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup
|
||||
matrix.
|
||||
`decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
|
||||
`(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
|
||||
can choose to directly pass an embedded representation. This is useful if you want more control over how to
|
||||
convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
|
||||
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
|
||||
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
|
||||
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
|
||||
@ -2159,8 +2176,8 @@ class BigBirdPegasusDecoder(BigBirdPegasusPreTrainedModel):
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
|
||||
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
|
||||
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor`
|
||||
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
||||
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
|
||||
control over how to convert `input_ids` indices into associated vectors than the model's internal
|
||||
embedding lookup matrix.
|
||||
@ -2346,6 +2363,7 @@ class BigBirdPegasusModel(BigBirdPegasusPreTrainedModel):
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=Seq2SeqModelOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_EXPECTED_OUTPUT_SHAPE,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
@ -2630,6 +2648,7 @@ class BigBirdPegasusForSequenceClassification(BigBirdPegasusPreTrainedModel):
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=Seq2SeqSequenceClassifierOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
@ -2755,6 +2774,8 @@ class BigBirdPegasusForQuestionAnswering(BigBirdPegasusPreTrainedModel):
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=Seq2SeqQuestionAnsweringModelOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_loss=_QA_EXPECTED_LOSS,
|
||||
expected_output=_QA_EXPECTED_OUTPUT_SHAPE,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
|
@ -506,20 +506,37 @@ BLENDERBOT_START_DOCSTRING = r"""
|
||||
"""
|
||||
|
||||
BLENDERBOT_GENERATION_EXAMPLE = r"""
|
||||
Conversation example::
|
||||
Conversation example:
|
||||
|
||||
>>> from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration >>> mname =
|
||||
'facebook/blenderbot-400M-distill' >>> model = BlenderbotForConditionalGeneration.from_pretrained(mname) >>>
|
||||
tokenizer = BlenderbotTokenizer.from_pretrained(mname) >>> UTTERANCE = "My friends are cool but they eat too
|
||||
many carbs." >>> print("Human: ", UTTERANCE) >>> inputs = tokenizer([UTTERANCE], return_tensors='pt') >>>
|
||||
reply_ids = model.generate(**inputs) >>> print("Bot: ", tokenizer.batch_decode(reply_ids,
|
||||
skip_special_tokens=True)[0])
|
||||
```python
|
||||
>>> from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
|
||||
|
||||
>>> REPLY = "I'm not sure" >>> print("Human: ", REPLY) >>> NEXT_UTTERANCE = ( ... "My friends are cool but they
|
||||
eat too many carbs.</s> <s>That's unfortunate. " ... "Are they trying to lose weight or are they just trying to
|
||||
be healthier?</s> " ... "<s> I'm not sure." ... ) >>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors='pt')
|
||||
>>> next_reply_ids = model.generate(**inputs) >>> print("Bot: ", tokenizer.batch_decode(next_reply_ids,
|
||||
skip_special_tokens=True)[0])
|
||||
>>> mname = "facebook/blenderbot-400M-distill"
|
||||
>>> model = BlenderbotForConditionalGeneration.from_pretrained(mname)
|
||||
>>> tokenizer = BlenderbotTokenizer.from_pretrained(mname)
|
||||
>>> UTTERANCE = "My friends are cool but they eat too many carbs."
|
||||
>>> print("Human: ", UTTERANCE)
|
||||
Human: My friends are cool but they eat too many carbs.
|
||||
|
||||
>>> inputs = tokenizer([UTTERANCE], return_tensors="pt")
|
||||
>>> reply_ids = model.generate(**inputs)
|
||||
>>> print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0])
|
||||
Bot: That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?
|
||||
|
||||
>>> REPLY = "I'm not sure"
|
||||
>>> print("Human: ", REPLY)
|
||||
Human: I'm not sure
|
||||
|
||||
>>> NEXT_UTTERANCE = (
|
||||
... "My friends are cool but they eat too many carbs.</s> <s>That's unfortunate. "
|
||||
... "Are they trying to lose weight or are they just trying to be healthier?</s> "
|
||||
... "<s> I'm not sure."
|
||||
... )
|
||||
>>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors="pt")
|
||||
>>> next_reply_ids = model.generate(**inputs)
|
||||
>>> print("Bot: ", tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0])
|
||||
Bot: That's too bad. Have you tried encouraging them to change their eating habits?
|
||||
```
|
||||
"""
|
||||
|
||||
BLENDERBOT_INPUTS_DOCSTRING = r"""
|
||||
@ -586,11 +603,10 @@ BLENDERBOT_INPUTS_DOCSTRING = r"""
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
|
||||
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
|
||||
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
||||
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids`
|
||||
you can choose to directly pass an embedded representation. This is useful if you want more control over
|
||||
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup
|
||||
matrix.
|
||||
`decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
|
||||
`(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
|
||||
can choose to directly pass an embedded representation. This is useful if you want more control over how to
|
||||
convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
|
||||
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
|
||||
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
|
||||
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
|
||||
@ -907,8 +923,8 @@ class BlenderbotDecoder(BlenderbotPreTrainedModel):
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
|
||||
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
|
||||
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor`
|
||||
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
||||
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
|
||||
control over how to convert `input_ids` indices into associated vectors than the model's internal
|
||||
embedding lookup matrix.
|
||||
@ -1130,13 +1146,13 @@ class BlenderbotModel(BlenderbotPreTrainedModel):
|
||||
>>> model = BlenderbotModel.from_pretrained("facebook/blenderbot-400M-distill")
|
||||
>>> tokenizer = BlenderbotTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
|
||||
|
||||
>>> input_ids = tokenizer(
|
||||
... "Studies have been shown that owning a dog is good for you", return_tensors="pt"
|
||||
>>> ).input_ids # Batch size 1
|
||||
>>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
|
||||
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
|
||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
|
||||
>>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_input_ids)
|
||||
|
||||
>>> last_hidden_states = outputs.last_hidden_state
|
||||
>>> list(last_hidden_states.shape)
|
||||
[1, 6, 1280]
|
||||
```"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
@ -1389,7 +1405,7 @@ class BlenderbotDecoderWrapper(BlenderbotPreTrainedModel):
|
||||
return self.decoder(*args, **kwargs)
|
||||
|
||||
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Blenderbot, facebook/bart-large->facebook/blenderbot-400M-distill
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Blenderbot, facebook/bart-base->facebook/blenderbot-400M-distill
|
||||
class BlenderbotForCausalLM(BlenderbotPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
config = copy.deepcopy(config)
|
||||
@ -1520,6 +1536,9 @@ class BlenderbotForCausalLM(BlenderbotPreTrainedModel):
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> logits = outputs.logits
|
||||
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
|
||||
>>> list(logits.shape) == expected_shape
|
||||
True
|
||||
```"""
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
|
@ -504,20 +504,37 @@ BLENDERBOT_SMALL_START_DOCSTRING = r"""
|
||||
"""
|
||||
|
||||
BLENDERBOT_SMALL_GENERATION_EXAMPLE = r"""
|
||||
Conversation example::
|
||||
Conversation example:
|
||||
|
||||
>>> from transformers import BlenderbotSmallTokenizer, BlenderbotSmallForConditionalGeneration >>> mname =
|
||||
'facebook/blenderbot_small-90M' >>> model = BlenderbotSmallForConditionalGeneration.from_pretrained(mname) >>>
|
||||
tokenizer = BlenderbotSmallTokenizer.from_pretrained(mname) >>> UTTERANCE = "My friends are cool but they eat
|
||||
too many carbs." >>> print("Human: ", UTTERANCE) >>> inputs = tokenizer([UTTERANCE], return_tensors='pt') >>>
|
||||
reply_ids = model.generate(**inputs) >>> print("Bot: ", tokenizer.batch_decode(reply_ids,
|
||||
skip_special_tokens=True)[0]) what kind of carbs do they eat? i don't know much about carbs.
|
||||
```python
|
||||
>>> from transformers import BlenderbotSmallTokenizer, BlenderbotSmallForConditionalGeneration
|
||||
|
||||
>>> REPLY = "I'm not sure" >>> print("Human: ", REPLY) >>> NEXT_UTTERANCE = ( ... "My friends are cool but they
|
||||
eat too many carbs.</s> " ... "<s>what kind of carbs do they eat? i don't know much about carbs.</s> " ...
|
||||
"<s>I'm not sure." ... ) >>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors='pt') >>>
|
||||
inputs.pop("token_type_ids") >>> next_reply_ids = model.generate(**inputs) >>> print("Bot: ",
|
||||
tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0])
|
||||
>>> mname = "facebook/blenderbot_small-90M"
|
||||
>>> model = BlenderbotSmallForConditionalGeneration.from_pretrained(mname)
|
||||
>>> tokenizer = BlenderbotSmallTokenizer.from_pretrained(mname)
|
||||
>>> UTTERANCE = "My friends are cool but they eat too many carbs."
|
||||
>>> print("Human: ", UTTERANCE)
|
||||
Human: My friends are cool but they eat too many carbs.
|
||||
|
||||
>>> inputs = tokenizer([UTTERANCE], return_tensors="pt")
|
||||
>>> reply_ids = model.generate(**inputs)
|
||||
>>> print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0])
|
||||
Bot: what kind of carbs do they eat? i don't know much about carbs.
|
||||
|
||||
>>> REPLY = "I'm not sure"
|
||||
>>> print("Human: ", REPLY)
|
||||
Human: I'm not sure
|
||||
|
||||
>>> NEXT_UTTERANCE = (
|
||||
... "My friends are cool but they eat too many carbs.</s> <s>what kind of carbs do they eat? "
|
||||
... "i don't know much about carbs</s> "
|
||||
... "<s> I'm not sure."
|
||||
... )
|
||||
>>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors="pt")
|
||||
>>> next_reply_ids = model.generate(**inputs)
|
||||
>>> print("Bot: ", tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0])
|
||||
Bot: they eat a lot of carbs. carbs are high in fat, protein, and carbohydrates.
|
||||
```
|
||||
"""
|
||||
|
||||
BLENDERBOT_SMALL_INPUTS_DOCSTRING = r"""
|
||||
@ -584,11 +601,10 @@ BLENDERBOT_SMALL_INPUTS_DOCSTRING = r"""
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
|
||||
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
|
||||
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
||||
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids`
|
||||
you can choose to directly pass an embedded representation. This is useful if you want more control over
|
||||
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup
|
||||
matrix.
|
||||
`decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
|
||||
`(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
|
||||
can choose to directly pass an embedded representation. This is useful if you want more control over how to
|
||||
convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
|
||||
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
|
||||
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
|
||||
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
|
||||
@ -902,8 +918,8 @@ class BlenderbotSmallDecoder(BlenderbotSmallPreTrainedModel):
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
|
||||
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
|
||||
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor`
|
||||
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
||||
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
|
||||
control over how to convert `input_ids` indices into associated vectors than the model's internal
|
||||
embedding lookup matrix.
|
||||
@ -1113,13 +1129,13 @@ class BlenderbotSmallModel(BlenderbotSmallPreTrainedModel):
|
||||
>>> model = BlenderbotSmallModel.from_pretrained("facebook/blenderbot_small-90M")
|
||||
>>> tokenizer = BlenderbotSmallTokenizer.from_pretrained("facebook/blenderbot_small-90M")
|
||||
|
||||
>>> input_ids = tokenizer(
|
||||
... "Studies have been shown that owning a dog is good for you", return_tensors="pt"
|
||||
>>> ).input_ids # Batch size 1
|
||||
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
|
||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
|
||||
>>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
|
||||
>>> decoder_inputs = tokenizer("Studies show that", return_tensors="pt") # Batch size 1
|
||||
>>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids)
|
||||
|
||||
>>> last_hidden_states = outputs.last_hidden_state
|
||||
>>> list(last_hidden_states.shape)
|
||||
[1, 3, 512]
|
||||
```"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
@ -1360,7 +1376,7 @@ class BlenderbotSmallDecoderWrapper(BlenderbotSmallPreTrainedModel):
|
||||
return self.decoder(*args, **kwargs)
|
||||
|
||||
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->BlenderbotSmall, facebook/bart-large->facebook/blenderbot_small-90M
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->BlenderbotSmall, facebook/bart-base->facebook/blenderbot_small-90M
|
||||
class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
config = copy.deepcopy(config)
|
||||
@ -1491,6 +1507,9 @@ class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel):
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> logits = outputs.logits
|
||||
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
|
||||
>>> list(logits.shape) == expected_shape
|
||||
True
|
||||
```"""
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
|
@ -523,27 +523,28 @@ MARIAN_START_DOCSTRING = r"""
|
||||
"""
|
||||
|
||||
MARIAN_GENERATION_EXAMPLE = r"""
|
||||
Pytorch version of marian-nmt's transformer.h (c++). Designed for the OPUS-NMT translation checkpoints.
|
||||
Available models are listed [here](https://huggingface.co/models?search=Helsinki-NLP).
|
||||
Pytorch version of marian-nmt's transformer.h (c++). Designed for the OPUS-NMT translation checkpoints. Available
|
||||
models are listed [here](https://huggingface.co/models?search=Helsinki-NLP).
|
||||
|
||||
Examples:
|
||||
Examples:
|
||||
|
||||
```python
|
||||
>>> from transformers import MarianTokenizer, MarianMTModel
|
||||
>>> from typing import List
|
||||
```python
|
||||
>>> from transformers import MarianTokenizer, MarianMTModel
|
||||
|
||||
>>> src = "fr" # source language
|
||||
>>> trg = "en" # target language
|
||||
>>> sample_text = "où est l'arrêt de bus ?"
|
||||
>>> model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
|
||||
>>> src = "fr" # source language
|
||||
>>> trg = "en" # target language
|
||||
|
||||
>>> model = MarianMTModel.from_pretrained(model_name)
|
||||
>>> tokenizer = MarianTokenizer.from_pretrained(model_name)
|
||||
>>> batch = tokenizer([sample_text], return_tensors="pt")
|
||||
>>> gen = model.generate(**batch)
|
||||
>>> tokenizer.batch_decode(gen, skip_special_tokens=True)
|
||||
"Where is the bus stop ?"
|
||||
```
|
||||
>>> model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
|
||||
>>> model = MarianMTModel.from_pretrained(model_name)
|
||||
>>> tokenizer = MarianTokenizer.from_pretrained(model_name)
|
||||
|
||||
>>> sample_text = "où est l'arrêt de bus ?"
|
||||
>>> batch = tokenizer([sample_text], return_tensors="pt")
|
||||
|
||||
>>> generated_ids = model.generate(**batch)
|
||||
>>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||
"Where's the bus stop?"
|
||||
```
|
||||
"""
|
||||
|
||||
MARIAN_INPUTS_DOCSTRING = r"""
|
||||
@ -927,7 +928,7 @@ class MarianDecoder(MarianPreTrainedModel):
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
|
||||
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
|
||||
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`.
|
||||
all `decoder_input_ids` of shape `(batch_size, sequence_length)`.
|
||||
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
|
||||
Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
|
||||
This is useful if you want more control over how to convert `input_ids` indices into associated vectors
|
||||
@ -1136,17 +1137,17 @@ class MarianModel(MarianPreTrainedModel):
|
||||
>>> tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de")
|
||||
>>> model = MarianModel.from_pretrained("Helsinki-NLP/opus-mt-en-de")
|
||||
|
||||
>>> input_ids = tokenizer(
|
||||
... "Studies have been shown that owning a dog is good for you", return_tensors="pt"
|
||||
>>> ).input_ids # Batch size 1
|
||||
>>> decoder_input_ids = tokenizer(
|
||||
>>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
|
||||
>>> decoder_inputs = tokenizer(
|
||||
... "<pad> Studien haben gezeigt dass es hilfreich ist einen Hund zu besitzen",
|
||||
... return_tensors="pt",
|
||||
... add_special_tokens=False,
|
||||
>>> ).input_ids # Batch size 1
|
||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
|
||||
... )
|
||||
>>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids)
|
||||
|
||||
>>> last_hidden_states = outputs.last_hidden_state
|
||||
>>> list(last_hidden_states.shape)
|
||||
[1, 26, 512]
|
||||
```"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
@ -1400,7 +1401,7 @@ class MarianDecoderWrapper(MarianPreTrainedModel):
|
||||
return self.decoder(*args, **kwargs)
|
||||
|
||||
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Marian, facebook/bart-large->Helsinki-NLP/opus-mt-fr-en
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Marian, facebook/bart-base->Helsinki-NLP/opus-mt-fr-en
|
||||
class MarianForCausalLM(MarianPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
config = copy.deepcopy(config)
|
||||
@ -1529,6 +1530,9 @@ class MarianForCausalLM(MarianPreTrainedModel):
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> logits = outputs.logits
|
||||
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
|
||||
>>> list(logits.shape) == expected_shape
|
||||
True
|
||||
```"""
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
|
@ -51,6 +51,16 @@ _CHECKPOINT_FOR_DOC = "facebook/mbart-large-cc25"
|
||||
_CONFIG_FOR_DOC = "MBartConfig"
|
||||
_TOKENIZER_FOR_DOC = "MBartTokenizer"
|
||||
|
||||
# Base model docstring
|
||||
_EXPECTED_OUTPUT_SHAPE = [1, 8, 1024]
|
||||
|
||||
# SequenceClassification docstring
|
||||
_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2]
|
||||
|
||||
# QuestionAsnwering docstring
|
||||
_QA_EXPECTED_LOSS = 3.04
|
||||
_QA_EXPECTED_OUTPUT_SHAPE = [1, 16]
|
||||
|
||||
|
||||
MBART_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"facebook/mbart-large-cc25",
|
||||
@ -532,20 +542,21 @@ MBART_START_DOCSTRING = r"""
|
||||
"""
|
||||
|
||||
MBART_GENERATION_EXAMPLE = r"""
|
||||
Summarization example:
|
||||
Translation example:
|
||||
|
||||
```python
|
||||
>>> from transformers import MBartTokenizer, MBartForConditionalGeneration
|
||||
|
||||
>>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25")
|
||||
>>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25")
|
||||
>>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
|
||||
>>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro")
|
||||
|
||||
>>> ARTICLE_TO_SUMMARIZE = "Meine Freunde sind cool, aber sie essen zu viel Kuchen."
|
||||
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt")
|
||||
>>> example_english_phrase = "42 is the answer"
|
||||
>>> inputs = tokenizer(example_english_phrase, return_tensors="pt")
|
||||
|
||||
>>> # Generate Summary
|
||||
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5)
|
||||
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
|
||||
>>> # Translate
|
||||
>>> generated_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5)
|
||||
>>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
||||
'42 este răspuns'
|
||||
```
|
||||
|
||||
Mask filling example:
|
||||
@ -567,6 +578,7 @@ MBART_GENERATION_EXAMPLE = r"""
|
||||
>>> values, predictions = probs.topk(5)
|
||||
|
||||
>>> tokenizer.decode(predictions).split()
|
||||
['nett', 'sehr', 'ganz', 'nicht', 'so']
|
||||
```
|
||||
"""
|
||||
|
||||
@ -639,11 +651,10 @@ MBART_INPUTS_DOCSTRING = r"""
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
|
||||
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
|
||||
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
||||
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids`
|
||||
you can choose to directly pass an embedded representation. This is useful if you want more control over
|
||||
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup
|
||||
matrix.
|
||||
`decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
|
||||
`(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
|
||||
can choose to directly pass an embedded representation. This is useful if you want more control over how to
|
||||
convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
|
||||
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
|
||||
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
|
||||
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
|
||||
@ -966,8 +977,8 @@ class MBartDecoder(MBartPreTrainedModel):
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
|
||||
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
|
||||
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor`
|
||||
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
||||
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
|
||||
control over how to convert `input_ids` indices into associated vectors than the model's internal
|
||||
embedding lookup matrix.
|
||||
@ -1153,6 +1164,7 @@ class MBartModel(MBartPreTrainedModel):
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=Seq2SeqModelOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_EXPECTED_OUTPUT_SHAPE,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
@ -1428,6 +1440,7 @@ class MBartForSequenceClassification(MBartPreTrainedModel):
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=Seq2SeqSequenceClassifierOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE,
|
||||
)
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward
|
||||
def forward(
|
||||
@ -1553,6 +1566,8 @@ class MBartForQuestionAnswering(MBartPreTrainedModel):
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=Seq2SeqQuestionAnsweringModelOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_loss=_QA_EXPECTED_LOSS,
|
||||
expected_output=_QA_EXPECTED_OUTPUT_SHAPE,
|
||||
)
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering.forward
|
||||
def forward(
|
||||
@ -1665,7 +1680,7 @@ class MBartDecoderWrapper(MBartPreTrainedModel):
|
||||
return self.decoder(*args, **kwargs)
|
||||
|
||||
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->MBart, facebook/bart-large->facebook/mbart-large-cc25
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->MBart, facebook/bart-base->facebook/mbart-large-cc25
|
||||
class MBartForCausalLM(MBartPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
config = copy.deepcopy(config)
|
||||
@ -1794,6 +1809,9 @@ class MBartForCausalLM(MBartPreTrainedModel):
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> logits = outputs.logits
|
||||
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
|
||||
>>> list(logits.shape) == expected_shape
|
||||
True
|
||||
```"""
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
|
@ -529,7 +529,8 @@ PEGASUS_GENERATION_EXAMPLE = r"""
|
||||
|
||||
>>> # Generate Summary
|
||||
>>> summary_ids = model.generate(inputs["input_ids"])
|
||||
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
|
||||
>>> tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
||||
"California's largest electricity provider has turned off power to hundreds of thousands of customers."
|
||||
```
|
||||
"""
|
||||
|
||||
@ -597,11 +598,10 @@ PEGASUS_INPUTS_DOCSTRING = r"""
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
|
||||
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
|
||||
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
||||
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids`
|
||||
you can choose to directly pass an embedded representation. This is useful if you want more control over
|
||||
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup
|
||||
matrix.
|
||||
`decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
|
||||
`(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
|
||||
can choose to directly pass an embedded representation. This is useful if you want more control over how to
|
||||
convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
|
||||
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
|
||||
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
|
||||
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
|
||||
@ -977,8 +977,8 @@ class PegasusDecoder(PegasusPreTrainedModel):
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
|
||||
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
|
||||
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor`
|
||||
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
||||
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
|
||||
control over how to convert `input_ids` indices into associated vectors than the model's internal
|
||||
embedding lookup matrix.
|
||||
@ -1211,13 +1211,13 @@ class PegasusModel(PegasusPreTrainedModel):
|
||||
>>> tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-large")
|
||||
>>> model = PegasusModel.from_pretrained("google/pegasus-large")
|
||||
|
||||
>>> input_ids = tokenizer(
|
||||
... "Studies have been shown that owning a dog is good for you", return_tensors="pt"
|
||||
>>> ).input_ids # Batch size 1
|
||||
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
|
||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
|
||||
>>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
|
||||
>>> decoder_inputs = tokenizer("Studies show that", return_tensors="pt")
|
||||
>>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids)
|
||||
|
||||
>>> last_hidden_states = outputs.last_hidden_state
|
||||
>>> list(last_hidden_states.shape)
|
||||
[1, 4, 1024]
|
||||
```"""
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
@ -1540,7 +1540,7 @@ class PegasusForCausalLM(PegasusPreTrainedModel):
|
||||
self.model.decoder.resize_position_embeddings(new_num_position_embeddings)
|
||||
|
||||
@replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC)
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM.forward with Bart->Pegasus, facebook/bart-large->google/pegasus-large
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM.forward with Bart->Pegasus, facebook/bart-base->google/pegasus-large
|
||||
def forward(
|
||||
self,
|
||||
input_ids=None,
|
||||
@ -1637,6 +1637,9 @@ class PegasusForCausalLM(PegasusPreTrainedModel):
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> logits = outputs.logits
|
||||
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
|
||||
>>> list(logits.shape) == expected_shape
|
||||
True
|
||||
```"""
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
|
@ -50,6 +50,12 @@ _CHECKPOINT_FOR_DOC = "uclanlp/plbart-base"
|
||||
_CONFIG_FOR_DOC = "PLBartConfig"
|
||||
_TOKENIZER_FOR_DOC = "PLBartTokenizer"
|
||||
|
||||
# Base model docstring
|
||||
_EXPECTED_OUTPUT_SHAPE = [1, 8, 768]
|
||||
|
||||
# SequenceClassification docstring
|
||||
_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2]
|
||||
|
||||
|
||||
PLBART_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"uclanlp/plbart-base",
|
||||
@ -526,27 +532,26 @@ PLBART_START_DOCSTRING = r"""
|
||||
"""
|
||||
|
||||
PLBART_GENERATION_EXAMPLE = r"""
|
||||
Token in-filling example:
|
||||
|
||||
>>> from transformers import PLBartTokenizer, PLBartForConditionalGeneration, PLBartConfig
|
||||
|
||||
>>> model = PLBartForConditionalGeneration.from_pretrained('uclanlp/plbart-base') >>> tokenizer =
|
||||
PLBartTokenizer.from_pretrained('uclanlp/plbart-base', src_lang='java', tgt_lang='java') >>> METHOD_TO_FILL =
|
||||
"public static main (String args[0]) { data=Date(); System.out. String.format("Current Date : % tc", ));}" >>>
|
||||
inputs = tokenizer([METHOD_TO_FILL], max_length=1024, return_tensors='pt') >>> # Generate Filled Code >>>
|
||||
generated_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5, early_stopping=True) >>>
|
||||
print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in
|
||||
generated_ids])
|
||||
|
||||
Mask-filling example:
|
||||
|
||||
>>> from transformers import PLBartTokenizer, PLBartForConditionalGeneration >>> tokenizer =
|
||||
PLBartTokenizer.from_pretrained('uclanlp/plbart-base') >>> # en_XX is the language symbol id <LID> for English
|
||||
>>> TXT = "</s> Is 0 the <mask> Fibonacci <mask> ? </s> en_XX" >>> model =
|
||||
PLBartForConditionalGeneration.from_pretrained('uclanlp/plbart-base') >>> input_ids = tokenizer([TXT],
|
||||
add_special_tokens=False, return_tensors='pt')['input_ids'] >>> logits = model(input_ids).logits >>>
|
||||
masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() >>> probs = logits[0,
|
||||
masked_index].softmax(dim=0) >>> values, predictions = probs.topk(5) >>> tokenizer.decode(predictions).split()
|
||||
```python
|
||||
>>> from transformers import PLBartTokenizer, PLBartForConditionalGeneration
|
||||
|
||||
>>> model = PLBartForConditionalGeneration.from_pretrained("uclanlp/plbart-base")
|
||||
>>> tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base")
|
||||
|
||||
>>> # en_XX is the language symbol id <LID> for English
|
||||
>>> TXT = "<s> Is 0 the <mask> Fibonacci number ? </s> en_XX"
|
||||
>>> input_ids = tokenizer([TXT], add_special_tokens=False, return_tensors="pt").input_ids
|
||||
|
||||
>>> logits = model(input_ids).logits
|
||||
>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
|
||||
>>> probs = logits[0, masked_index].softmax(dim=0)
|
||||
>>> values, predictions = probs.topk(5)
|
||||
|
||||
>>> tokenizer.decode(predictions).split()
|
||||
['same', 'first', 'highest', 'result', 'Fib']
|
||||
```
|
||||
"""
|
||||
|
||||
PLBART_INPUTS_DOCSTRING = r"""
|
||||
@ -619,7 +624,7 @@ PLBART_INPUTS_DOCSTRING = r"""
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
|
||||
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
|
||||
``decoder_input_ids``` of shape `(batch_size, sequence_length)`.
|
||||
`decoder_input_ids` of shape `(batch_size, sequence_length)`.
|
||||
inputs_embeds (:
|
||||
obj:*torch.FloatTensor* of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally,
|
||||
instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful
|
||||
@ -948,8 +953,8 @@ class PLBartDecoder(PLBartPreTrainedModel):
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
|
||||
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
|
||||
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor`
|
||||
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
|
||||
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
|
||||
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
|
||||
control over how to convert `input_ids` indices into associated vectors than the model's internal
|
||||
embedding lookup matrix.
|
||||
@ -1406,6 +1411,7 @@ class PLBartForSequenceClassification(PLBartPreTrainedModel):
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=Seq2SeqSequenceClassifierOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE,
|
||||
)
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward
|
||||
def forward(
|
||||
@ -1521,7 +1527,7 @@ class PLBartDecoderWrapper(PLBartPreTrainedModel):
|
||||
return self.decoder(*args, **kwargs)
|
||||
|
||||
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->PLBart
|
||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->PLBart, facebook/bart-base->uclanlp/plbart-base
|
||||
class PLBartForCausalLM(PLBartPreTrainedModel):
|
||||
def __init__(self, config):
|
||||
config = copy.deepcopy(config)
|
||||
@ -1643,13 +1649,16 @@ class PLBartForCausalLM(PLBartPreTrainedModel):
|
||||
```python
|
||||
>>> from transformers import PLBartTokenizer, PLBartForCausalLM
|
||||
|
||||
>>> tokenizer = PLBartTokenizer.from_pretrained("facebook/bart-large")
|
||||
>>> model = PLBartForCausalLM.from_pretrained("facebook/bart-large", add_cross_attention=False)
|
||||
>>> tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base")
|
||||
>>> model = PLBartForCausalLM.from_pretrained("uclanlp/plbart-base", add_cross_attention=False)
|
||||
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> logits = outputs.logits
|
||||
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
|
||||
>>> list(logits.shape) == expected_shape
|
||||
True
|
||||
```"""
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
|
@ -20,5 +20,13 @@ src/transformers/models/poolformer/modeling_poolformer.py
|
||||
src/transformers/models/vit_mae/modeling_vit_mae.py
|
||||
src/transformers/models/segformer/modeling_segformer.py
|
||||
src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py
|
||||
src/transformers/models/bart/modeling_bart.py
|
||||
src/transformers/models/mbart/modeling_mbart.py
|
||||
src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
|
||||
src/transformers/models/marian/modeling_marian.py
|
||||
src/transformers/models/pegasus/modeling_pegasus.py
|
||||
src/transformers/models/blenderbot/modeling_blenderbot.py
|
||||
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
|
||||
src/transformers/models/plbart/modeling_plbart.py
|
||||
docs/source/quicktour.mdx
|
||||
docs/source/task_summary.mdx
|
Loading…
Reference in New Issue
Block a user