Fix EncoderDecoderModel docs (#14197)

* Fix docs

* Apply suggestions from review + fix bug
This commit is contained in:
NielsRogge 2021-10-28 18:01:00 +02:00 committed by GitHub
parent ac12a5ae47
commit 5f3bf65111
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 26 deletions

View File

@ -98,9 +98,9 @@ ENCODER_DECODER_INPUTS_DOCSTRING = r"""
If :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see
:obj:`past_key_values`).
Provide for sequence to sequence training to the decoder. Indices can be obtained using
:class:`~transformers.PreTrainedTokenizer`. See :meth:`transformers.PreTrainedTokenizer.encode` and
:meth:`transformers.PreTrainedTokenizer.__call__` for details.
For training, :obj:`decoder_input_ids` are automatically created by the model by shifting the :obj:`labels`
to the right, replacing -100 by the :obj:`pad_token_id` and prepending them with the
:obj:`decoder_start_token_id`.
decoder_attention_mask (:obj:`torch.BoolTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
Default behavior: generate a tensor that ignores pad tokens in :obj:`decoder_input_ids`. Causal mask will
also be used by default.
@ -425,12 +425,14 @@ class EncoderDecoderModel(PreTrainedModel):
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = EncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert from pre-trained checkpoints
>>> # forward
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
>>> # training
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids)
>>> model.config.decoder_start_token_id = tokenizer.cls_token_id
>>> model.config.pad_token_id = tokenizer.pad_token_id
>>> model.config.vocab_size = model.config.decoder.vocab_size
>>> input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt").input_ids
>>> labels = tokenizer("Salut, mon chien est mignon", return_tensors="pt").input_ids
>>> outputs = model(input_ids=input_ids, labels=input_ids)
>>> loss, logits = outputs.loss, outputs.logits
>>> # save and load from pretrained

View File

@ -103,9 +103,9 @@ SPEECH_ENCODER_DECODER_INPUTS_DOCSTRING = r"""
If :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see
:obj:`past_key_values`).
Provide for sequence to sequence training to the decoder. Indices can be obtained using
:class:`~transformers.PreTrainedTokenizer`. See :meth:`transformers.PreTrainedTokenizer.encode` and
:meth:`transformers.PreTrainedTokenizer.__call__` for details.
For training, :obj:`decoder_input_ids` are automatically created by the model by shifting the :obj:`labels`
to the right, replacing -100 by the :obj:`pad_token_id` and prepending them with the
:obj:`decoder_start_token_id`.
decoder_attention_mask (:obj:`torch.BoolTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
Default behavior: generate a tensor that ignores pad tokens in :obj:`decoder_input_ids`. Causal mask will
also be used by default.
@ -424,25 +424,19 @@ class SpeechEncoderDecoderModel(PreTrainedModel):
Examples::
>>> from transformers import SpeechEncoderDecoderModel, Speech2Text2Processor
>>> from datasets import load_dataset
>>> import torch
>>> processor = Speech2Text2Processor.from_pretrained('facebook/s2t-wav2vec2-large-en-de')
>>> model = SpeechEncoderDecoderModel.from_pretrained('facebook/s2t-wav2vec2-large-en-de')
>>> # process dataset
>>> def map_to_array(batch):
>>> speech, _ = sf.read(batch["file"])
>>> batch["speech"] = speech
>>> return batch
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
>>> input_values = processor(ds[0]["audio"]["array"], return_tensors="pt").input_values
>>> decoder_input_ids = torch.tensor([[model.config.decoder.decoder_start_token_id]])
>>> outputs = model(input_values=input_values, decoder_input_ids=decoder_input_ids)
>>> # generation
>>> # inference (generation)
>>> generated = model.generate(input_values)
>>> translation = processor.batch_decode(generated)

View File

@ -113,9 +113,9 @@ VISION_ENCODER_DECODER_INPUTS_DOCSTRING = r"""
If :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see
:obj:`past_key_values`).
Provide for sequence to sequence training to the decoder. Indices can be obtained using
:class:`~transformers.PreTrainedTokenizer`. See :meth:`transformers.PreTrainedTokenizer.encode` and
:meth:`transformers.PreTrainedTokenizer.__call__` for details.
For training, :obj:`decoder_input_ids` are automatically created by the model by shifting the :obj:`labels`
to the right, replacing -100 by the :obj:`pad_token_id` and prepending them with the
:obj:`decoder_start_token_id`.
decoder_attention_mask (:obj:`torch.BoolTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
Default behavior: generate a tensor that ignores pad tokens in :obj:`decoder_input_ids`. Causal mask will
also be used by default.
@ -428,9 +428,15 @@ class VisionEncoderDecoderModel(PreTrainedModel):
>>> image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
>>> # training
>>> pixel_values = processor(image, return_tensors="pt").pixel_values # Batch size 1
>>> decoder_input_ids = torch.tensor([[model.config.decoder.decoder_start_token_id]])
>>> outputs = model(pixel_values=pixel_values, decoder_input_ids=decoder_input_ids)
>>> model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
>>> model.config.pad_token_id = processor.tokenizer.pad_token_id
>>> model.config.vocab_size = model.config.decoder.vocab_size
>>> pixel_values = processor(image, return_tensors="pt").pixel_values
>>> text = "hello world"
>>> labels = processor.tokenizer(text, return_tensors="pt").input_ids
>>> outputs = model(pixel_values=pixel_values, labels=labels)
>>> loss = outputs.loss
>>> # inference (generation)
>>> generated_ids = model.generate(pixel_values)