[fix gemma] Set default value for output_attentions parameter in Gemma2 and Gemma… (#37633)

* Set default value for output_attentions parameter in Gemma2 and Gemma3 models * update * fix * fix --------- Co-authored-by: chenin <wangzhichen@encosmart.com>
2025-07-31 02:02:21 +06:00 · 2025-04-22 17:18:17 +08:00 · 2025-04-22 17:18:17 +08:00 · 006530d285
commit 006530d285
parent 31ea547b7a
4 changed files with 4 additions and 4 deletions
--- a/src/transformers/models/cohere2/modeling_cohere2.py
+++ b/src/transformers/models/cohere2/modeling_cohere2.py
@ -660,7 +660,7 @@ class Cohere2Model(Cohere2PreTrainedModel):
        input_tensor: torch.Tensor,
        cache_position: torch.Tensor,
        past_key_values: HybridCache,
-        output_attentions: bool,
+        output_attentions: bool = False,
    ):
        # Flash Attention currently doesn't support static cache but Cohere2 work only with static cache.
        # So we will pass in attention mask as is in any case, not only when ther's padding. Then we'll use its shape
--- a/src/transformers/models/gemma2/modeling_gemma2.py
+++ b/src/transformers/models/gemma2/modeling_gemma2.py
@ -673,7 +673,7 @@ class Gemma2Model(Gemma2PreTrainedModel):
        input_tensor: torch.Tensor,
        cache_position: torch.Tensor,
        past_key_values: HybridCache,
-        output_attentions: bool,
+        output_attentions: bool = False,
    ):
        # Flash Attention currently doesn't support static cache but Gemma2 work only with static cache.
        # So we will pass in attention mask as is in any case, not only when ther's padding. Then we'll use its shape
--- a/src/transformers/models/gemma2/modular_gemma2.py
+++ b/src/transformers/models/gemma2/modular_gemma2.py
@ -540,7 +540,7 @@ class Gemma2Model(GemmaModel):
        input_tensor: torch.Tensor,
        cache_position: torch.Tensor,
        past_key_values: HybridCache,
-        output_attentions: bool,
+        output_attentions: bool = False,
    ):
        # Flash Attention currently doesn't support static cache but Gemma2 work only with static cache.
        # So we will pass in attention mask as is in any case, not only when ther's padding. Then we'll use its shape
--- a/src/transformers/models/gemma3/modeling_gemma3.py
+++ b/src/transformers/models/gemma3/modeling_gemma3.py
@ -758,7 +758,7 @@ class Gemma3TextModel(Gemma3PreTrainedModel):
        input_tensor: torch.Tensor,
        cache_position: torch.Tensor,
        past_key_values: HybridCache,
-        output_attentions: bool,
+        output_attentions: bool = False,
    ):
        # Flash Attention currently doesn't support static cache but Gemma3Text work only with static cache.
        # So we will pass in attention mask as is in any case, not only when ther's padding. Then we'll use its shape