mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
[fix gemma] Set default value for output_attentions parameter in Gemma2 and Gemma… (#37633)
* Set default value for output_attentions parameter in Gemma2 and Gemma3 models * update * fix * fix --------- Co-authored-by: chenin <wangzhichen@encosmart.com>
This commit is contained in:
parent
31ea547b7a
commit
006530d285
@ -660,7 +660,7 @@ class Cohere2Model(Cohere2PreTrainedModel):
|
||||
input_tensor: torch.Tensor,
|
||||
cache_position: torch.Tensor,
|
||||
past_key_values: HybridCache,
|
||||
output_attentions: bool,
|
||||
output_attentions: bool = False,
|
||||
):
|
||||
# Flash Attention currently doesn't support static cache but Cohere2 work only with static cache.
|
||||
# So we will pass in attention mask as is in any case, not only when ther's padding. Then we'll use its shape
|
||||
|
@ -673,7 +673,7 @@ class Gemma2Model(Gemma2PreTrainedModel):
|
||||
input_tensor: torch.Tensor,
|
||||
cache_position: torch.Tensor,
|
||||
past_key_values: HybridCache,
|
||||
output_attentions: bool,
|
||||
output_attentions: bool = False,
|
||||
):
|
||||
# Flash Attention currently doesn't support static cache but Gemma2 work only with static cache.
|
||||
# So we will pass in attention mask as is in any case, not only when ther's padding. Then we'll use its shape
|
||||
|
@ -540,7 +540,7 @@ class Gemma2Model(GemmaModel):
|
||||
input_tensor: torch.Tensor,
|
||||
cache_position: torch.Tensor,
|
||||
past_key_values: HybridCache,
|
||||
output_attentions: bool,
|
||||
output_attentions: bool = False,
|
||||
):
|
||||
# Flash Attention currently doesn't support static cache but Gemma2 work only with static cache.
|
||||
# So we will pass in attention mask as is in any case, not only when ther's padding. Then we'll use its shape
|
||||
|
@ -758,7 +758,7 @@ class Gemma3TextModel(Gemma3PreTrainedModel):
|
||||
input_tensor: torch.Tensor,
|
||||
cache_position: torch.Tensor,
|
||||
past_key_values: HybridCache,
|
||||
output_attentions: bool,
|
||||
output_attentions: bool = False,
|
||||
):
|
||||
# Flash Attention currently doesn't support static cache but Gemma3Text work only with static cache.
|
||||
# So we will pass in attention mask as is in any case, not only when ther's padding. Then we'll use its shape
|
||||
|
Loading…
Reference in New Issue
Block a user