mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Fix EncoderDecoderModel docs (#14197)
* Fix docs * Apply suggestions from review + fix bug
This commit is contained in:
parent
ac12a5ae47
commit
5f3bf65111
@ -98,9 +98,9 @@ ENCODER_DECODER_INPUTS_DOCSTRING = r"""
|
||||
If :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see
|
||||
:obj:`past_key_values`).
|
||||
|
||||
Provide for sequence to sequence training to the decoder. Indices can be obtained using
|
||||
:class:`~transformers.PreTrainedTokenizer`. See :meth:`transformers.PreTrainedTokenizer.encode` and
|
||||
:meth:`transformers.PreTrainedTokenizer.__call__` for details.
|
||||
For training, :obj:`decoder_input_ids` are automatically created by the model by shifting the :obj:`labels`
|
||||
to the right, replacing -100 by the :obj:`pad_token_id` and prepending them with the
|
||||
:obj:`decoder_start_token_id`.
|
||||
decoder_attention_mask (:obj:`torch.BoolTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
|
||||
Default behavior: generate a tensor that ignores pad tokens in :obj:`decoder_input_ids`. Causal mask will
|
||||
also be used by default.
|
||||
@ -425,12 +425,14 @@ class EncoderDecoderModel(PreTrainedModel):
|
||||
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
>>> model = EncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert from pre-trained checkpoints
|
||||
|
||||
>>> # forward
|
||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
|
||||
|
||||
>>> # training
|
||||
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids)
|
||||
>>> model.config.decoder_start_token_id = tokenizer.cls_token_id
|
||||
>>> model.config.pad_token_id = tokenizer.pad_token_id
|
||||
>>> model.config.vocab_size = model.config.decoder.vocab_size
|
||||
|
||||
>>> input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt").input_ids
|
||||
>>> labels = tokenizer("Salut, mon chien est mignon", return_tensors="pt").input_ids
|
||||
>>> outputs = model(input_ids=input_ids, labels=input_ids)
|
||||
>>> loss, logits = outputs.loss, outputs.logits
|
||||
|
||||
>>> # save and load from pretrained
|
||||
|
@ -103,9 +103,9 @@ SPEECH_ENCODER_DECODER_INPUTS_DOCSTRING = r"""
|
||||
If :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see
|
||||
:obj:`past_key_values`).
|
||||
|
||||
Provide for sequence to sequence training to the decoder. Indices can be obtained using
|
||||
:class:`~transformers.PreTrainedTokenizer`. See :meth:`transformers.PreTrainedTokenizer.encode` and
|
||||
:meth:`transformers.PreTrainedTokenizer.__call__` for details.
|
||||
For training, :obj:`decoder_input_ids` are automatically created by the model by shifting the :obj:`labels`
|
||||
to the right, replacing -100 by the :obj:`pad_token_id` and prepending them with the
|
||||
:obj:`decoder_start_token_id`.
|
||||
decoder_attention_mask (:obj:`torch.BoolTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
|
||||
Default behavior: generate a tensor that ignores pad tokens in :obj:`decoder_input_ids`. Causal mask will
|
||||
also be used by default.
|
||||
@ -424,25 +424,19 @@ class SpeechEncoderDecoderModel(PreTrainedModel):
|
||||
Examples::
|
||||
|
||||
>>> from transformers import SpeechEncoderDecoderModel, Speech2Text2Processor
|
||||
>>> from datasets import load_dataset
|
||||
>>> import torch
|
||||
|
||||
>>> processor = Speech2Text2Processor.from_pretrained('facebook/s2t-wav2vec2-large-en-de')
|
||||
>>> model = SpeechEncoderDecoderModel.from_pretrained('facebook/s2t-wav2vec2-large-en-de')
|
||||
|
||||
>>> # process dataset
|
||||
>>> def map_to_array(batch):
|
||||
>>> speech, _ = sf.read(batch["file"])
|
||||
>>> batch["speech"] = speech
|
||||
>>> return batch
|
||||
|
||||
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
>>> ds = ds.map(map_to_array)
|
||||
|
||||
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
||||
>>> input_values = processor(ds[0]["audio"]["array"], return_tensors="pt").input_values
|
||||
>>> decoder_input_ids = torch.tensor([[model.config.decoder.decoder_start_token_id]])
|
||||
>>> outputs = model(input_values=input_values, decoder_input_ids=decoder_input_ids)
|
||||
|
||||
>>> # generation
|
||||
>>> # inference (generation)
|
||||
>>> generated = model.generate(input_values)
|
||||
>>> translation = processor.batch_decode(generated)
|
||||
|
||||
|
@ -113,9 +113,9 @@ VISION_ENCODER_DECODER_INPUTS_DOCSTRING = r"""
|
||||
If :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see
|
||||
:obj:`past_key_values`).
|
||||
|
||||
Provide for sequence to sequence training to the decoder. Indices can be obtained using
|
||||
:class:`~transformers.PreTrainedTokenizer`. See :meth:`transformers.PreTrainedTokenizer.encode` and
|
||||
:meth:`transformers.PreTrainedTokenizer.__call__` for details.
|
||||
For training, :obj:`decoder_input_ids` are automatically created by the model by shifting the :obj:`labels`
|
||||
to the right, replacing -100 by the :obj:`pad_token_id` and prepending them with the
|
||||
:obj:`decoder_start_token_id`.
|
||||
decoder_attention_mask (:obj:`torch.BoolTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
|
||||
Default behavior: generate a tensor that ignores pad tokens in :obj:`decoder_input_ids`. Causal mask will
|
||||
also be used by default.
|
||||
@ -428,9 +428,15 @@ class VisionEncoderDecoderModel(PreTrainedModel):
|
||||
>>> image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
|
||||
|
||||
>>> # training
|
||||
>>> pixel_values = processor(image, return_tensors="pt").pixel_values # Batch size 1
|
||||
>>> decoder_input_ids = torch.tensor([[model.config.decoder.decoder_start_token_id]])
|
||||
>>> outputs = model(pixel_values=pixel_values, decoder_input_ids=decoder_input_ids)
|
||||
>>> model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
|
||||
>>> model.config.pad_token_id = processor.tokenizer.pad_token_id
|
||||
>>> model.config.vocab_size = model.config.decoder.vocab_size
|
||||
|
||||
>>> pixel_values = processor(image, return_tensors="pt").pixel_values
|
||||
>>> text = "hello world"
|
||||
>>> labels = processor.tokenizer(text, return_tensors="pt").input_ids
|
||||
>>> outputs = model(pixel_values=pixel_values, labels=labels)
|
||||
>>> loss = outputs.loss
|
||||
|
||||
>>> # inference (generation)
|
||||
>>> generated_ids = model.generate(pixel_values)
|
||||
|
Loading…
Reference in New Issue
Block a user