mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
remove unused use_cache
in config classes (#20844)
remove unused use_cache in config classes Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
d0bfdd20f4
commit
2280880cb7
@ -104,7 +104,6 @@ class CanineConfig(PretrainedConfig):
|
||||
type_vocab_size=16,
|
||||
initializer_range=0.02,
|
||||
layer_norm_eps=1e-12,
|
||||
use_cache=True,
|
||||
pad_token_id=0,
|
||||
bos_token_id=0xE000,
|
||||
eos_token_id=0xE001,
|
||||
@ -128,7 +127,6 @@ class CanineConfig(PretrainedConfig):
|
||||
self.initializer_range = initializer_range
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.layer_norm_eps = layer_norm_eps
|
||||
self.use_cache = use_cache
|
||||
|
||||
# Character config:
|
||||
self.downsampling_rate = downsampling_rate
|
||||
|
@ -70,9 +70,6 @@ class LiltConfig(PretrainedConfig):
|
||||
[Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155).
|
||||
For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models
|
||||
with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658).
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not the model should return the last key/values attentions (not used by all models). Only
|
||||
relevant if `config.is_decoder=True`.
|
||||
classifier_dropout (`float`, *optional*):
|
||||
The dropout ratio for the classification head.
|
||||
channel_shrink_ratio (`int`, *optional*, defaults to 4):
|
||||
@ -111,7 +108,6 @@ class LiltConfig(PretrainedConfig):
|
||||
layer_norm_eps=1e-12,
|
||||
pad_token_id=0,
|
||||
position_embedding_type="absolute",
|
||||
use_cache=True,
|
||||
classifier_dropout=None,
|
||||
channel_shrink_ratio=4,
|
||||
max_2d_position_embeddings=1024,
|
||||
@ -132,7 +128,6 @@ class LiltConfig(PretrainedConfig):
|
||||
self.initializer_range = initializer_range
|
||||
self.layer_norm_eps = layer_norm_eps
|
||||
self.position_embedding_type = position_embedding_type
|
||||
self.use_cache = use_cache
|
||||
self.classifier_dropout = classifier_dropout
|
||||
self.channel_shrink_ratio = channel_shrink_ratio
|
||||
self.max_2d_position_embeddings = max_2d_position_embeddings
|
||||
|
@ -92,9 +92,6 @@ class LongformerConfig(PretrainedConfig):
|
||||
[Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155).
|
||||
For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models
|
||||
with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658).
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not the model should return the last key/values attentions (not used by all models). Only
|
||||
relevant if `config.is_decoder=True`.
|
||||
classifier_dropout (`float`, *optional*):
|
||||
The dropout ratio for the classification head.
|
||||
attention_window (`int` or `List[int]`, *optional*, defaults to 512):
|
||||
@ -137,7 +134,6 @@ class LongformerConfig(PretrainedConfig):
|
||||
initializer_range: float = 0.02,
|
||||
layer_norm_eps: float = 1e-12,
|
||||
position_embedding_type: str = "absolute",
|
||||
use_cache: bool = True,
|
||||
classifier_dropout: float = None,
|
||||
onnx_export: bool = False,
|
||||
**kwargs
|
||||
@ -162,7 +158,6 @@ class LongformerConfig(PretrainedConfig):
|
||||
self.initializer_range = initializer_range
|
||||
self.layer_norm_eps = layer_norm_eps
|
||||
self.position_embedding_type = position_embedding_type
|
||||
self.use_cache = use_cache
|
||||
self.classifier_dropout = classifier_dropout
|
||||
self.onnx_export = onnx_export
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user