mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-14 10:08:29 +06:00
[tests] remove test_sdpa_equivalence
(redundant) (#37911)
* rm test_sdpa_equivalence * make fixup --------- Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
parent
ea29f61ed9
commit
40a493c7ed
@ -297,10 +297,6 @@ class AyaVisionModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester
|
|||||||
def test_multi_gpu_data_parallel_forward(self):
|
def test_multi_gpu_data_parallel_forward(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@unittest.skip("Cohere2's eager attn/sdpa attn outputs are expected to be different")
|
|
||||||
def test_sdpa_equivalence(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
@unittest.skip(reason="SiglipVisionModel does not support standalone training")
|
@unittest.skip(reason="SiglipVisionModel does not support standalone training")
|
||||||
def test_training(self):
|
def test_training(self):
|
||||||
pass
|
pass
|
||||||
|
@ -127,10 +127,6 @@ class Cohere2ModelTest(CohereModelTest, unittest.TestCase):
|
|||||||
def test_generate_continue_from_inputs_embeds(self):
|
def test_generate_continue_from_inputs_embeds(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@unittest.skip("Cohere2's eager attn/sdpa attn outputs are expected to be different")
|
|
||||||
def test_sdpa_equivalence(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_read_token
|
@require_read_token
|
||||||
|
@ -300,10 +300,6 @@ class DeepseekV3ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTeste
|
|||||||
def test_generate_continue_from_inputs_embeds(self):
|
def test_generate_continue_from_inputs_embeds(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@unittest.skip("DeepseekV3's eager attn/sdpa attn outputs are expected to be different")
|
|
||||||
def test_sdpa_equivalence(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
@unittest.skip("Deepseek-V3 uses MLA so it is not compatible with the standard cache format")
|
@unittest.skip("Deepseek-V3 uses MLA so it is not compatible with the standard cache format")
|
||||||
def test_beam_search_generate_dict_outputs_use_cache(self):
|
def test_beam_search_generate_dict_outputs_use_cache(self):
|
||||||
pass
|
pass
|
||||||
|
@ -303,38 +303,6 @@ class GemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
|||||||
def test_flash_attn_2_inference_equivalence_right_padding(self):
|
def test_flash_attn_2_inference_equivalence_right_padding(self):
|
||||||
self.skipTest(reason="Gemma flash attention does not support right padding")
|
self.skipTest(reason="Gemma flash attention does not support right padding")
|
||||||
|
|
||||||
@require_torch_sdpa
|
|
||||||
@require_torch_accelerator
|
|
||||||
@slow
|
|
||||||
def test_sdpa_equivalence(self):
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
if not model_class._supports_sdpa:
|
|
||||||
self.skipTest(reason="Model does not support SDPA")
|
|
||||||
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
model = model_class(config)
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
||||||
model.save_pretrained(tmpdirname)
|
|
||||||
model_sdpa = model_class.from_pretrained(
|
|
||||||
tmpdirname, torch_dtype=torch.float16, attn_implementation="sdpa"
|
|
||||||
)
|
|
||||||
model_sdpa.to(torch_device)
|
|
||||||
|
|
||||||
model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16, attn_implementation="eager")
|
|
||||||
model.to(torch_device)
|
|
||||||
|
|
||||||
dummy_input = inputs_dict[model_class.main_input_name]
|
|
||||||
dummy_input = dummy_input.to(torch_device)
|
|
||||||
outputs = model(dummy_input, output_hidden_states=True)
|
|
||||||
outputs_sdpa = model_sdpa(dummy_input, output_hidden_states=True)
|
|
||||||
|
|
||||||
logits = outputs.hidden_states[-1]
|
|
||||||
logits_sdpa = outputs_sdpa.hidden_states[-1]
|
|
||||||
|
|
||||||
# gemma sdpa needs a high tolerance
|
|
||||||
assert torch.allclose(logits_sdpa, logits, atol=3e-3)
|
|
||||||
|
|
||||||
@require_flash_attn
|
@require_flash_attn
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
@pytest.mark.flash_attn_test
|
@pytest.mark.flash_attn_test
|
||||||
|
@ -143,10 +143,6 @@ class Gemma2ModelTest(GemmaModelTest, unittest.TestCase):
|
|||||||
def test_generate_continue_from_inputs_embeds(self):
|
def test_generate_continue_from_inputs_embeds(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@unittest.skip("Gemma2's eager attn/sdpa attn outputs are expected to be different")
|
|
||||||
def test_sdpa_equivalence(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
@unittest.skip(
|
@unittest.skip(
|
||||||
reason="HybridCache can't be gathered because it is not iterable. Adding a simple iter and dumping `distributed_iterator`"
|
reason="HybridCache can't be gathered because it is not iterable. Adding a simple iter and dumping `distributed_iterator`"
|
||||||
" as in Dynamic Cache doesn't work. NOTE: @gante all cache objects would need better compatibility with multi gpu setting"
|
" as in Dynamic Cache doesn't work. NOTE: @gante all cache objects would need better compatibility with multi gpu setting"
|
||||||
|
@ -28,7 +28,6 @@ from transformers.testing_utils import (
|
|||||||
require_torch,
|
require_torch,
|
||||||
require_torch_accelerator,
|
require_torch_accelerator,
|
||||||
require_torch_gpu,
|
require_torch_gpu,
|
||||||
require_torch_sdpa,
|
|
||||||
slow,
|
slow,
|
||||||
torch_device,
|
torch_device,
|
||||||
)
|
)
|
||||||
@ -102,38 +101,6 @@ class NemotronModelTest(GemmaModelTest):
|
|||||||
def test_model_outputs_equivalence(self, **kwargs):
|
def test_model_outputs_equivalence(self, **kwargs):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@require_torch_sdpa
|
|
||||||
@require_torch_accelerator
|
|
||||||
@slow
|
|
||||||
def test_sdpa_equivalence(self):
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
if not model_class._supports_sdpa:
|
|
||||||
self.skipTest(reason="Model does not support SDPA")
|
|
||||||
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
model = model_class(config)
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
||||||
model.save_pretrained(tmpdirname)
|
|
||||||
model_sdpa = model_class.from_pretrained(
|
|
||||||
tmpdirname, torch_dtype=torch.float16, attn_implementation="sdpa"
|
|
||||||
)
|
|
||||||
model_sdpa.to(torch_device)
|
|
||||||
|
|
||||||
model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16, attn_implementation="eager")
|
|
||||||
model.to(torch_device)
|
|
||||||
|
|
||||||
dummy_input = inputs_dict[model_class.main_input_name]
|
|
||||||
dummy_input = dummy_input.to(torch_device)
|
|
||||||
outputs = model(dummy_input, output_hidden_states=True)
|
|
||||||
outputs_sdpa = model_sdpa(dummy_input, output_hidden_states=True)
|
|
||||||
|
|
||||||
logits = outputs.hidden_states[-1]
|
|
||||||
logits_sdpa = outputs_sdpa.hidden_states[-1]
|
|
||||||
|
|
||||||
# nemotron sdpa needs a high tolerance
|
|
||||||
assert torch.allclose(logits_sdpa, logits, atol=1e-2)
|
|
||||||
|
|
||||||
@require_flash_attn
|
@require_flash_attn
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
@pytest.mark.flash_attn_test
|
@pytest.mark.flash_attn_test
|
||||||
|
Loading…
Reference in New Issue
Block a user