[CI] remove redundant checks in test_eager_matches_sdpa_inference (#36740)

2025-07-03 21:00:08 +06:00 · 2025-03-17 16:29:18 +00:00 · 2025-03-17 16:29:18 +00:00 · cff4caa0c1
commit cff4caa0c1
parent e3af4fec91
1 changed files with 5 additions and 5 deletions
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@ -138,16 +138,16 @@ TEST_EAGER_MATCHES_SDPA_INFERENCE_PARAMETERIZATION = [
    (
        # test name for the test runner
        f"{dtype}_pad_{padding_side}{'' if use_attention_mask else '_no_attn_mask'}"
-        f"{'_output_attn' if output_attentions else ''}{'_sdpa_kernels' if enable_kernels else ''}",
+        f"{'_sdpa_kernels' if enable_kernels else ''}",
        # parameterization
-        *(dtype, padding_side, use_attention_mask, output_attentions, enable_kernels),
+        *(dtype, padding_side, use_attention_mask, False, enable_kernels),
    )
    for dtype in ("fp16", "fp32", "bf16")
    for padding_side in ("left", "right")
    for use_attention_mask in (True, False)
-    for output_attentions in (True, False)
    for enable_kernels in (True, False)
-]
+    # Extra test case: `output_attentions=True` has special attention mask handling and sdpa reverts to eager
+] + [("fp32_pad_left_output_attentions", "fp32", "left", True, True, False)]


 def _config_zero_init(config):
@ -3618,7 +3618,7 @@ class ModelTesterMixin:
            ("cuda", False, torch.bfloat16): 1e-2,
            ("cuda", False, torch.float16): 5e-3,
            ("cuda", True, torch.float32): 1e-4,
-            ("cuda", True, torch.bfloat16): 3e-2,
+            ("cuda", True, torch.bfloat16): 3e-2,  # (different from others)
            ("cuda", True, torch.float16): 5e-3,
        }