[fix gemma] Set default value for output_attentions parameter in Gemma2 and Gemma… (#37633)

* Set default value for output_attentions parameter in Gemma2 and Gemma3 models

* update

* fix

* fix

---------

Co-authored-by: chenin <wangzhichen@encosmart.com>
This commit is contained in:
chenin-wang 2025-04-22 17:18:17 +08:00 committed by GitHub
parent 31ea547b7a
commit 006530d285
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 4 additions and 4 deletions

View File

@ -660,7 +660,7 @@ class Cohere2Model(Cohere2PreTrainedModel):
input_tensor: torch.Tensor,
cache_position: torch.Tensor,
past_key_values: HybridCache,
output_attentions: bool,
output_attentions: bool = False,
):
# Flash Attention currently doesn't support static cache but Cohere2 work only with static cache.
# So we will pass in attention mask as is in any case, not only when ther's padding. Then we'll use its shape

View File

@ -673,7 +673,7 @@ class Gemma2Model(Gemma2PreTrainedModel):
input_tensor: torch.Tensor,
cache_position: torch.Tensor,
past_key_values: HybridCache,
output_attentions: bool,
output_attentions: bool = False,
):
# Flash Attention currently doesn't support static cache but Gemma2 work only with static cache.
# So we will pass in attention mask as is in any case, not only when ther's padding. Then we'll use its shape

View File

@ -540,7 +540,7 @@ class Gemma2Model(GemmaModel):
input_tensor: torch.Tensor,
cache_position: torch.Tensor,
past_key_values: HybridCache,
output_attentions: bool,
output_attentions: bool = False,
):
# Flash Attention currently doesn't support static cache but Gemma2 work only with static cache.
# So we will pass in attention mask as is in any case, not only when ther's padding. Then we'll use its shape

View File

@ -758,7 +758,7 @@ class Gemma3TextModel(Gemma3PreTrainedModel):
input_tensor: torch.Tensor,
cache_position: torch.Tensor,
past_key_values: HybridCache,
output_attentions: bool,
output_attentions: bool = False,
):
# Flash Attention currently doesn't support static cache but Gemma3Text work only with static cache.
# So we will pass in attention mask as is in any case, not only when ther's padding. Then we'll use its shape