mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Update missing docs on activation_dropout
and fix DropOut docs for SEW-D (#26031)
* add missing doc for activation dropout * fix doc for SEW-D dropout * deprecate hidden_dropout for SEW-D
This commit is contained in:
parent
0c67a72c9a
commit
18ee1fe762
@ -58,6 +58,8 @@ class Data2VecAudioConfig(PretrainedConfig):
|
||||
`"relu"`, `"selu"` and `"gelu_new"` are supported.
|
||||
hidden_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
activation_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for activations inside the fully connected layer.
|
||||
attention_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
final_dropout (`float`, *optional*, defaults to 0.1):
|
||||
|
@ -58,6 +58,8 @@ class HubertConfig(PretrainedConfig):
|
||||
`"relu"`, `"selu"` and `"gelu_new"` are supported.
|
||||
hidden_dropout(`float`, *optional*, defaults to 0.1):
|
||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
activation_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for activations inside the fully connected layer.
|
||||
attention_dropout(`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
final_dropout (`float`, *optional*, defaults to 0.1):
|
||||
|
@ -59,6 +59,8 @@ class SEWConfig(PretrainedConfig):
|
||||
`"relu"`, `"selu"` and `"gelu_new"` are supported.
|
||||
hidden_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
activation_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for activations inside the fully connected layer.
|
||||
attention_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
final_dropout (`float`, *optional*, defaults to 0.1):
|
||||
|
@ -72,6 +72,8 @@ class SEWDConfig(PretrainedConfig):
|
||||
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
|
||||
`"relu"`, `"selu"`, `"gelu_python"` and `"gelu_new"` are supported.
|
||||
hidden_dropout (`float`, *optional*, defaults to 0.1):
|
||||
Deprecated. Not used by the model and will be removed in a future version.
|
||||
activation_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
attention_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
@ -238,7 +240,7 @@ class SEWDConfig(PretrainedConfig):
|
||||
self.pos_att_type = list(pos_att_type)
|
||||
self.hidden_act = hidden_act
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.hidden_dropout = hidden_dropout
|
||||
self._hidden_dropout = hidden_dropout
|
||||
self.attention_dropout = attention_dropout
|
||||
self.activation_dropout = activation_dropout
|
||||
self.feat_proj_dropout = feat_proj_dropout
|
||||
@ -280,3 +282,16 @@ class SEWDConfig(PretrainedConfig):
|
||||
@property
|
||||
def inputs_to_logits_ratio(self):
|
||||
return functools.reduce(operator.mul, self.conv_stride, 1)
|
||||
|
||||
@property
|
||||
def hidden_dropout(self):
|
||||
logger.warning_once("hidden_dropout is not used by the model and will be removed as config attribute in v4.35")
|
||||
return self._hidden_dropout
|
||||
|
||||
def to_dict(self):
|
||||
"""
|
||||
Serializes this instance to a Python dictionary.
|
||||
"""
|
||||
output = super().to_dict()
|
||||
output["hidden_dropout"] = output.pop("_hidden_dropout")
|
||||
return output
|
||||
|
@ -61,6 +61,8 @@ class UniSpeechConfig(PretrainedConfig):
|
||||
`"relu"`, `"selu"` and `"gelu_new"` are supported.
|
||||
hidden_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
activation_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for activations inside the fully connected layer.
|
||||
attention_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
final_dropout (`float`, *optional*, defaults to 0.1):
|
||||
|
@ -62,6 +62,8 @@ class UniSpeechSatConfig(PretrainedConfig):
|
||||
`"relu"`, `"selu"` and `"gelu_new"` are supported.
|
||||
hidden_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
activation_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for activations inside the fully connected layer.
|
||||
attention_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
final_dropout (`float`, *optional*, defaults to 0.1):
|
||||
|
@ -59,6 +59,8 @@ class Wav2Vec2Config(PretrainedConfig):
|
||||
`"relu"`, `"selu"` and `"gelu_new"` are supported.
|
||||
hidden_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
activation_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for activations inside the fully connected layer.
|
||||
attention_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
final_dropout (`float`, *optional*, defaults to 0.1):
|
||||
|
@ -61,6 +61,8 @@ class Wav2Vec2ConformerConfig(PretrainedConfig):
|
||||
`"relu"`, `"selu"` and `"gelu_new"` are supported.
|
||||
hidden_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
activation_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for activations inside the fully connected layer.
|
||||
attention_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
final_dropout (`float`, *optional*, defaults to 0.1):
|
||||
|
@ -58,6 +58,8 @@ class WavLMConfig(PretrainedConfig):
|
||||
`"relu"`, `"selu"` and `"gelu_new"` are supported.
|
||||
hidden_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
activation_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for activations inside the fully connected layer.
|
||||
attention_dropout (`float`, *optional*, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
final_dropout (`float`, *optional*, defaults to 0.1):
|
||||
|
Loading…
Reference in New Issue
Block a user