mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-03 03:31:05 +06:00
Change default value of attn_temperature_tuning
(#37501)
fix: change default value of `attn_temperature_tuning`
This commit is contained in:
parent
c8e0e603de
commit
d6ac923ad9
@ -228,7 +228,9 @@ class Llama4TextConfig(PretrainedConfig):
|
|||||||
no_rope_layer_interval (`int`, *optional*, defaults to 4): TODO
|
no_rope_layer_interval (`int`, *optional*, defaults to 4): TODO
|
||||||
attention_chunk_size (`int`, *optional*, defaults to 8192):
|
attention_chunk_size (`int`, *optional*, defaults to 8192):
|
||||||
<TODO>
|
<TODO>
|
||||||
attn_temperature_tuning (`int`, *optional*, defaults to 4): TODO
|
attn_temperature_tuning (`bool`, *optional*, defaults to `True`):
|
||||||
|
Whether to dynamically scale the attention temperature for each query token based on sequence length.
|
||||||
|
Recommended for long sequences (e.g., >32k tokens) to maintain stable output results.
|
||||||
floor_scale (`int`, *optional*, defaults to 8192): TODO
|
floor_scale (`int`, *optional*, defaults to 8192): TODO
|
||||||
attn_scale (`int`, *optional*, defaults to 0.1): TODO
|
attn_scale (`int`, *optional*, defaults to 0.1): TODO
|
||||||
cache_implementation (`<fill_type>`, *optional*, defaults to `"hybrid"`): <fill_docstring>
|
cache_implementation (`<fill_type>`, *optional*, defaults to `"hybrid"`): <fill_docstring>
|
||||||
@ -291,7 +293,7 @@ class Llama4TextConfig(PretrainedConfig):
|
|||||||
no_rope_layers=None,
|
no_rope_layers=None,
|
||||||
no_rope_layer_interval=4,
|
no_rope_layer_interval=4,
|
||||||
attention_chunk_size=8192,
|
attention_chunk_size=8192,
|
||||||
attn_temperature_tuning=4,
|
attn_temperature_tuning=True,
|
||||||
floor_scale=8192,
|
floor_scale=8192,
|
||||||
attn_scale=0.1,
|
attn_scale=0.1,
|
||||||
cache_implementation="hybrid",
|
cache_implementation="hybrid",
|
||||||
|
Loading…
Reference in New Issue
Block a user