mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Update document of WhisperDecoderLayer (#21621)
* Update document of WhisperDecoderLayer * Update modeling_mbart.py * Update doc with utils/check_copies.py --fix_and_overwrite * Update modeling_xlm_prophetnet.py
This commit is contained in:
parent
61abe3290b
commit
212c42a1e3
@ -1483,11 +1483,11 @@ class BigBirdPegasusDecoderLayer(nn.Module):
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
@ -390,11 +390,11 @@ class BlenderbotDecoderLayer(nn.Module):
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
@ -458,11 +458,11 @@ class M2M100DecoderLayer(nn.Module):
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
@ -397,11 +397,11 @@ class MBartDecoderLayer(nn.Module):
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
@ -405,11 +405,11 @@ class PegasusDecoderLayer(nn.Module):
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
@ -465,11 +465,11 @@ class Speech2TextDecoderLayer(nn.Module):
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
@ -372,11 +372,11 @@ class WhisperDecoderLayer(nn.Module):
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
@ -430,11 +430,11 @@ class XGLMDecoderLayer(nn.Module):
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
attention_mask (`torch.FloatTensor`): attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
encoder_hidden_states (`torch.FloatTensor`):
|
||||
cross attention input to the layer of shape `(seq_len, batch, embed_dim)`
|
||||
cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||
encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
|
||||
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
|
||||
layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
|
||||
|
Loading…
Reference in New Issue
Block a user