optimize the code

2025-07-31 02:02:21 +06:00 · 2025-06-08 15:34:08 +02:00 · 2025-06-08 15:34:08 +02:00 · fa6506b7c0
commit fa6506b7c0
parent 58852ee6c9
2 changed files with 4 additions and 4 deletions
--- a/src/transformers/models/modernbert/modeling_modernbert.py
+++ b/src/transformers/models/modernbert/modeling_modernbert.py
@ -930,8 +930,8 @@ class ModernBertModel(ModernBertPreTrainedModel):
                )

        # Expand the attention mask
-        if self.config._attn_implementation == "sdpa" and attention_mask.dim() == 2:
-            # Expand the attention mask for SDPA.
+        if attention_mask.dim() == 2:
+            # Expand the attention mask
            # [bsz, seq_len] -> [bsz, 1, seq_len, seq_len]
            global_attention_mask = _prepare_4d_attention_mask(attention_mask, self.dtype, tgt_len=input_shape[1])
        else:
--- a/src/transformers/models/modernbert/modular_modernbert.py
+++ b/src/transformers/models/modernbert/modular_modernbert.py
@ -1060,8 +1060,8 @@ class ModernBertModel(ModernBertPreTrainedModel):
                )

        # Expand the attention mask
-        if self.config._attn_implementation == "sdpa" and attention_mask.dim() == 2:
-            # Expand the attention mask for SDPA.
+        if attention_mask.dim() == 2:
+            # Expand the attention mask
            # [bsz, seq_len] -> [bsz, 1, seq_len, seq_len]
            global_attention_mask = _prepare_4d_attention_mask(attention_mask, self.dtype, tgt_len=input_shape[1])
        else: