diff --git a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py index d724f70e19c..d34128ba067 100644 --- a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py +++ b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py @@ -22,6 +22,7 @@ from transformers.testing_utils import ( require_bitsandbytes, require_torch, require_torch_accelerator, + require_torch_multi_accelerator, require_torch_multi_gpu, slow, torch_device, @@ -517,7 +518,7 @@ class FalconMambaIntegrationTests(unittest.TestCase): self.assertListEqual(out, EXPECTED_OUTPUT) - @require_torch_multi_gpu + @require_torch_multi_accelerator def test_training_kernel(self): model_id = "tiiuae/falcon-mamba-7b" diff --git a/tests/peft_integration/test_peft_integration.py b/tests/peft_integration/test_peft_integration.py index 203124439dd..721ec7ea3f6 100644 --- a/tests/peft_integration/test_peft_integration.py +++ b/tests/peft_integration/test_peft_integration.py @@ -35,7 +35,6 @@ from transformers.testing_utils import ( require_peft, require_torch, require_torch_accelerator, - require_torch_gpu, slow, torch_device, ) @@ -422,7 +421,7 @@ class PeftIntegrationTester(unittest.TestCase, PeftTesterMixin): self.assertNotIn("adapter_1", model.peft_config) self.assertIn("adapter_2", model.peft_config) - @require_torch_gpu + @require_torch_accelerator @require_bitsandbytes def test_peft_from_pretrained_kwargs(self): """ diff --git a/tests/quantization/autoawq/test_awq.py b/tests/quantization/autoawq/test_awq.py index 95e26e18c81..d234dd408a5 100644 --- a/tests/quantization/autoawq/test_awq.py +++ b/tests/quantization/autoawq/test_awq.py @@ -24,6 +24,7 @@ from transformers.testing_utils import ( require_intel_extension_for_pytorch, require_torch_accelerator, require_torch_gpu, + require_torch_multi_accelerator, require_torch_multi_gpu, slow, torch_device, @@ -202,6 +203,7 @@ class AwqTest(unittest.TestCase): output = quantized_model.generate(**input_ids, max_new_tokens=40) self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT_BF16) + @require_torch_gpu def test_quantized_model_exllama(self): """ Simple test that checks if the quantized model is working properly with exllama backend @@ -240,7 +242,7 @@ class AwqTest(unittest.TestCase): output = model.generate(**input_ids, max_new_tokens=40) self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT) - @require_torch_multi_gpu + @require_torch_multi_accelerator def test_quantized_model_multi_gpu(self): """ Simple test that checks if the quantized model is working properly with multiple GPUs @@ -275,7 +277,7 @@ class AwqTest(unittest.TestCase): @slow -@require_torch_gpu +@require_torch_accelerator @require_auto_awq @require_accelerate class AwqFusedTest(unittest.TestCase): diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 5323760fe0a..3e360c05f0a 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -3825,7 +3825,7 @@ class ModelTesterMixin: ) @require_torch_sdpa - @require_torch_gpu + @require_torch_accelerator @slow def test_sdpa_can_dispatch_on_flash(self): if not self.has_attentions: @@ -3836,8 +3836,8 @@ class ModelTesterMixin: self.skipTest(reason="This test requires an NVIDIA GPU with compute capability >= 8.0") elif device_type == "rocm" and major < 9: self.skipTest(reason="This test requires an AMD GPU with compute capability >= 9.0") - else: - self.skipTest(reason="This test requires a Nvidia or AMD GPU") + elif device_type not in ["cuda", "rocm", "xpu"]: + self.skipTest(reason="This test requires a Nvidia or AMD GPU, or an Intel XPU") torch.compiler.reset()