enable several cases on XPU (#37516)

* enable several cases on XPU

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* Update tests/test_modeling_common.py

Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>

* fix style

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

---------

Signed-off-by: YAO Matrix <matrix.yao@intel.com>
Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
Yao Matrix 2025-04-16 17:01:04 +08:00 committed by GitHub
parent 5ab7a7c640
commit 33f6c5a5c8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 10 additions and 8 deletions

View File

@ -22,6 +22,7 @@ from transformers.testing_utils import (
require_bitsandbytes,
require_torch,
require_torch_accelerator,
require_torch_multi_accelerator,
require_torch_multi_gpu,
slow,
torch_device,
@ -517,7 +518,7 @@ class FalconMambaIntegrationTests(unittest.TestCase):
self.assertListEqual(out, EXPECTED_OUTPUT)
@require_torch_multi_gpu
@require_torch_multi_accelerator
def test_training_kernel(self):
model_id = "tiiuae/falcon-mamba-7b"

View File

@ -35,7 +35,6 @@ from transformers.testing_utils import (
require_peft,
require_torch,
require_torch_accelerator,
require_torch_gpu,
slow,
torch_device,
)
@ -422,7 +421,7 @@ class PeftIntegrationTester(unittest.TestCase, PeftTesterMixin):
self.assertNotIn("adapter_1", model.peft_config)
self.assertIn("adapter_2", model.peft_config)
@require_torch_gpu
@require_torch_accelerator
@require_bitsandbytes
def test_peft_from_pretrained_kwargs(self):
"""

View File

@ -24,6 +24,7 @@ from transformers.testing_utils import (
require_intel_extension_for_pytorch,
require_torch_accelerator,
require_torch_gpu,
require_torch_multi_accelerator,
require_torch_multi_gpu,
slow,
torch_device,
@ -202,6 +203,7 @@ class AwqTest(unittest.TestCase):
output = quantized_model.generate(**input_ids, max_new_tokens=40)
self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT_BF16)
@require_torch_gpu
def test_quantized_model_exllama(self):
"""
Simple test that checks if the quantized model is working properly with exllama backend
@ -240,7 +242,7 @@ class AwqTest(unittest.TestCase):
output = model.generate(**input_ids, max_new_tokens=40)
self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT)
@require_torch_multi_gpu
@require_torch_multi_accelerator
def test_quantized_model_multi_gpu(self):
"""
Simple test that checks if the quantized model is working properly with multiple GPUs
@ -275,7 +277,7 @@ class AwqTest(unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
@require_auto_awq
@require_accelerate
class AwqFusedTest(unittest.TestCase):

View File

@ -3825,7 +3825,7 @@ class ModelTesterMixin:
)
@require_torch_sdpa
@require_torch_gpu
@require_torch_accelerator
@slow
def test_sdpa_can_dispatch_on_flash(self):
if not self.has_attentions:
@ -3836,8 +3836,8 @@ class ModelTesterMixin:
self.skipTest(reason="This test requires an NVIDIA GPU with compute capability >= 8.0")
elif device_type == "rocm" and major < 9:
self.skipTest(reason="This test requires an AMD GPU with compute capability >= 9.0")
else:
self.skipTest(reason="This test requires a Nvidia or AMD GPU")
elif device_type not in ["cuda", "rocm", "xpu"]:
self.skipTest(reason="This test requires a Nvidia or AMD GPU, or an Intel XPU")
torch.compiler.reset()