mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 13:20:12 +06:00
enable several cases on XPU (#37516)
* enable several cases on XPU Signed-off-by: YAO Matrix <matrix.yao@intel.com> * Update tests/test_modeling_common.py Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * fix style Signed-off-by: YAO Matrix <matrix.yao@intel.com> --------- Signed-off-by: YAO Matrix <matrix.yao@intel.com> Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
parent
5ab7a7c640
commit
33f6c5a5c8
@ -22,6 +22,7 @@ from transformers.testing_utils import (
|
|||||||
require_bitsandbytes,
|
require_bitsandbytes,
|
||||||
require_torch,
|
require_torch,
|
||||||
require_torch_accelerator,
|
require_torch_accelerator,
|
||||||
|
require_torch_multi_accelerator,
|
||||||
require_torch_multi_gpu,
|
require_torch_multi_gpu,
|
||||||
slow,
|
slow,
|
||||||
torch_device,
|
torch_device,
|
||||||
@ -517,7 +518,7 @@ class FalconMambaIntegrationTests(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertListEqual(out, EXPECTED_OUTPUT)
|
self.assertListEqual(out, EXPECTED_OUTPUT)
|
||||||
|
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_accelerator
|
||||||
def test_training_kernel(self):
|
def test_training_kernel(self):
|
||||||
model_id = "tiiuae/falcon-mamba-7b"
|
model_id = "tiiuae/falcon-mamba-7b"
|
||||||
|
|
||||||
|
@ -35,7 +35,6 @@ from transformers.testing_utils import (
|
|||||||
require_peft,
|
require_peft,
|
||||||
require_torch,
|
require_torch,
|
||||||
require_torch_accelerator,
|
require_torch_accelerator,
|
||||||
require_torch_gpu,
|
|
||||||
slow,
|
slow,
|
||||||
torch_device,
|
torch_device,
|
||||||
)
|
)
|
||||||
@ -422,7 +421,7 @@ class PeftIntegrationTester(unittest.TestCase, PeftTesterMixin):
|
|||||||
self.assertNotIn("adapter_1", model.peft_config)
|
self.assertNotIn("adapter_1", model.peft_config)
|
||||||
self.assertIn("adapter_2", model.peft_config)
|
self.assertIn("adapter_2", model.peft_config)
|
||||||
|
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
@require_bitsandbytes
|
@require_bitsandbytes
|
||||||
def test_peft_from_pretrained_kwargs(self):
|
def test_peft_from_pretrained_kwargs(self):
|
||||||
"""
|
"""
|
||||||
|
@ -24,6 +24,7 @@ from transformers.testing_utils import (
|
|||||||
require_intel_extension_for_pytorch,
|
require_intel_extension_for_pytorch,
|
||||||
require_torch_accelerator,
|
require_torch_accelerator,
|
||||||
require_torch_gpu,
|
require_torch_gpu,
|
||||||
|
require_torch_multi_accelerator,
|
||||||
require_torch_multi_gpu,
|
require_torch_multi_gpu,
|
||||||
slow,
|
slow,
|
||||||
torch_device,
|
torch_device,
|
||||||
@ -202,6 +203,7 @@ class AwqTest(unittest.TestCase):
|
|||||||
output = quantized_model.generate(**input_ids, max_new_tokens=40)
|
output = quantized_model.generate(**input_ids, max_new_tokens=40)
|
||||||
self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT_BF16)
|
self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT_BF16)
|
||||||
|
|
||||||
|
@require_torch_gpu
|
||||||
def test_quantized_model_exllama(self):
|
def test_quantized_model_exllama(self):
|
||||||
"""
|
"""
|
||||||
Simple test that checks if the quantized model is working properly with exllama backend
|
Simple test that checks if the quantized model is working properly with exllama backend
|
||||||
@ -240,7 +242,7 @@ class AwqTest(unittest.TestCase):
|
|||||||
output = model.generate(**input_ids, max_new_tokens=40)
|
output = model.generate(**input_ids, max_new_tokens=40)
|
||||||
self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT)
|
self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT)
|
||||||
|
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_accelerator
|
||||||
def test_quantized_model_multi_gpu(self):
|
def test_quantized_model_multi_gpu(self):
|
||||||
"""
|
"""
|
||||||
Simple test that checks if the quantized model is working properly with multiple GPUs
|
Simple test that checks if the quantized model is working properly with multiple GPUs
|
||||||
@ -275,7 +277,7 @@ class AwqTest(unittest.TestCase):
|
|||||||
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
@require_auto_awq
|
@require_auto_awq
|
||||||
@require_accelerate
|
@require_accelerate
|
||||||
class AwqFusedTest(unittest.TestCase):
|
class AwqFusedTest(unittest.TestCase):
|
||||||
|
@ -3825,7 +3825,7 @@ class ModelTesterMixin:
|
|||||||
)
|
)
|
||||||
|
|
||||||
@require_torch_sdpa
|
@require_torch_sdpa
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
@slow
|
@slow
|
||||||
def test_sdpa_can_dispatch_on_flash(self):
|
def test_sdpa_can_dispatch_on_flash(self):
|
||||||
if not self.has_attentions:
|
if not self.has_attentions:
|
||||||
@ -3836,8 +3836,8 @@ class ModelTesterMixin:
|
|||||||
self.skipTest(reason="This test requires an NVIDIA GPU with compute capability >= 8.0")
|
self.skipTest(reason="This test requires an NVIDIA GPU with compute capability >= 8.0")
|
||||||
elif device_type == "rocm" and major < 9:
|
elif device_type == "rocm" and major < 9:
|
||||||
self.skipTest(reason="This test requires an AMD GPU with compute capability >= 9.0")
|
self.skipTest(reason="This test requires an AMD GPU with compute capability >= 9.0")
|
||||||
else:
|
elif device_type not in ["cuda", "rocm", "xpu"]:
|
||||||
self.skipTest(reason="This test requires a Nvidia or AMD GPU")
|
self.skipTest(reason="This test requires a Nvidia or AMD GPU, or an Intel XPU")
|
||||||
|
|
||||||
torch.compiler.reset()
|
torch.compiler.reset()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user