mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
remove unused activation dropout (#18842)
This commit is contained in:
parent
a26114777e
commit
9faa9f9dac
@ -67,8 +67,6 @@ class OPTConfig(PretrainedConfig):
|
||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
attention_dropout (`float`, *optional*, defaults to 0.0):
|
||||
The dropout ratio for the attention probabilities.
|
||||
activation_dropout (`float`, *optional*, defaults to 0.0):
|
||||
The dropout ratio for activations inside the fully connected layer.
|
||||
layerdrop: (`float`, *optional*, defaults to 0.0):
|
||||
The LayerDrop probability. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556) for more
|
||||
details.
|
||||
@ -106,7 +104,6 @@ class OPTConfig(PretrainedConfig):
|
||||
word_embed_proj_dim=None,
|
||||
dropout=0.1,
|
||||
attention_dropout=0.0,
|
||||
activation_dropout=0.0,
|
||||
num_attention_heads=12,
|
||||
activation_function="relu",
|
||||
layerdrop=0.0,
|
||||
@ -132,7 +129,6 @@ class OPTConfig(PretrainedConfig):
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.dropout = dropout
|
||||
self.attention_dropout = attention_dropout
|
||||
self.activation_dropout = activation_dropout
|
||||
self.activation_function = activation_function
|
||||
self.init_std = init_std
|
||||
self.layerdrop = layerdrop
|
||||
|
@ -281,8 +281,6 @@ class OPTDecoderLayer(nn.Module):
|
||||
self.dropout = config.dropout
|
||||
self.activation_fn = ACT2FN[config.activation_function]
|
||||
|
||||
self.activation_dropout = config.activation_dropout
|
||||
|
||||
self.self_attn_layer_norm = nn.LayerNorm(self.embed_dim)
|
||||
self.fc1 = nn.Linear(self.embed_dim, config.ffn_dim)
|
||||
self.fc2 = nn.Linear(config.ffn_dim, self.embed_dim)
|
||||
|
Loading…
Reference in New Issue
Block a user