clean up unused classifier_dropout in config (#20596)

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar 2022-12-05 18:04:33 +01:00 committed by GitHub
parent eefae413d1
commit 4430b91298
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 0 additions and 24 deletions

View File

@ -71,8 +71,6 @@ class BlenderbotConfig(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
max_position_embeddings (`int`, *optional*, defaults to 128):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
@ -131,7 +129,6 @@ class BlenderbotConfig(PretrainedConfig):
activation_dropout=0.0,
init_std=0.02,
decoder_start_token_id=1,
classifier_dropout=0.0,
scale_embedding=False,
pad_token_id=0,
bos_token_id=1,
@ -156,7 +153,6 @@ class BlenderbotConfig(PretrainedConfig):
self.init_std = init_std
self.encoder_layerdrop = encoder_layerdrop
self.decoder_layerdrop = decoder_layerdrop
self.classifier_dropout = classifier_dropout
self.use_cache = use_cache
self.num_hidden_layers = encoder_layers
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True

View File

@ -71,8 +71,6 @@ class BlenderbotSmallConfig(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
max_position_embeddings (`int`, *optional*, defaults to 512):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
@ -131,7 +129,6 @@ class BlenderbotSmallConfig(PretrainedConfig):
activation_dropout=0.0,
init_std=0.02,
decoder_start_token_id=1,
classifier_dropout=0.0,
scale_embedding=False,
pad_token_id=0,
bos_token_id=1,
@ -155,7 +152,6 @@ class BlenderbotSmallConfig(PretrainedConfig):
self.init_std = init_std
self.encoder_layerdrop = encoder_layerdrop
self.decoder_layerdrop = decoder_layerdrop
self.classifier_dropout = classifier_dropout
self.use_cache = use_cache
self.num_hidden_layers = encoder_layers
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True

View File

@ -87,8 +87,6 @@ class ChineseCLIPTextConfig(PretrainedConfig):
use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models). Only
relevant if `config.is_decoder=True`.
classifier_dropout (`float`, *optional*):
The dropout ratio for the classification head.
Example:
@ -124,7 +122,6 @@ class ChineseCLIPTextConfig(PretrainedConfig):
pad_token_id=0,
position_embedding_type="absolute",
use_cache=True,
classifier_dropout=None,
**kwargs
):
super().__init__(pad_token_id=pad_token_id, **kwargs)
@ -144,7 +141,6 @@ class ChineseCLIPTextConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps
self.position_embedding_type = position_embedding_type
self.use_cache = use_cache
self.classifier_dropout = classifier_dropout
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":

View File

@ -69,8 +69,6 @@ class MarianConfig(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
max_position_embeddings (`int`, *optional*, defaults to 1024):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
@ -130,7 +128,6 @@ class MarianConfig(PretrainedConfig):
activation_dropout=0.0,
init_std=0.02,
decoder_start_token_id=58100,
classifier_dropout=0.0,
scale_embedding=False,
pad_token_id=58100,
eos_token_id=0,
@ -155,7 +152,6 @@ class MarianConfig(PretrainedConfig):
self.init_std = init_std
self.encoder_layerdrop = encoder_layerdrop
self.decoder_layerdrop = decoder_layerdrop
self.classifier_dropout = classifier_dropout
self.use_cache = use_cache
self.num_hidden_layers = encoder_layers
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True

View File

@ -64,8 +64,6 @@ class PegasusConfig(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
max_position_embeddings (`int`, *optional*, defaults to 1024):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
@ -124,7 +122,6 @@ class PegasusConfig(PretrainedConfig):
activation_dropout=0.0,
init_std=0.02,
decoder_start_token_id=0,
classifier_dropout=0.0,
scale_embedding=False,
pad_token_id=0,
eos_token_id=1,
@ -147,7 +144,6 @@ class PegasusConfig(PretrainedConfig):
self.init_std = init_std
self.encoder_layerdrop = encoder_layerdrop
self.decoder_layerdrop = decoder_layerdrop
self.classifier_dropout = classifier_dropout
self.use_cache = use_cache
self.num_hidden_layers = encoder_layers
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True

View File

@ -60,8 +60,6 @@ class Speech2Text2Config(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
init_std (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
https://arxiv.org/abs/1909.11556>`__ for more details.
@ -109,7 +107,6 @@ class Speech2Text2Config(PretrainedConfig):
activation_dropout=0.0,
init_std=0.02,
decoder_start_token_id=2,
classifier_dropout=0.0,
scale_embedding=True,
pad_token_id=1,
bos_token_id=0,
@ -129,7 +126,6 @@ class Speech2Text2Config(PretrainedConfig):
self.activation_function = activation_function
self.init_std = init_std
self.decoder_layerdrop = decoder_layerdrop
self.classifier_dropout = classifier_dropout
self.use_cache = use_cache
self.num_hidden_layers = decoder_layers
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True