diff --git a/tests/models/aria/test_modeling_aria.py b/tests/models/aria/test_modeling_aria.py index 1a2c72a72bf..747963aa50e 100644 --- a/tests/models/aria/test_modeling_aria.py +++ b/tests/models/aria/test_modeling_aria.py @@ -30,6 +30,7 @@ from transformers import ( ) from transformers.models.idefics3 import Idefics3VisionConfig from transformers.testing_utils import ( + Expectations, backend_empty_cache, require_bitsandbytes, require_torch, @@ -483,23 +484,26 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase): device=model.device, dtype=model.dtype ) - EXPECTED_OUTPUT = { - "cpu": [ - "<|im_start|>user\n \n \n USER: What's the difference of two images?\n ASSISTANT: \n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with", - "<|im_start|>user\n \n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a grassy hill. The alpaca has", - ], # cpu output - "cuda": [ - "<|im_start|>user\n \n \n USER: What's the difference of two images?\n ASSISTANT: \n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with", - "<|im_start|>user\n \n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a patch of ground with some dry grass. The", - ], # cuda output - "xpu": [ - "<|im_start|>user\n \n \n USER: What's the difference of two images?\n ASSISTANT: \n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with", - "<|im_start|>user\n \n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a grassy hill. The alpaca has", - ], # xpu output - } + EXPECTED_OUTPUTS = Expectations( + { + ("cpu", None): [ + "<|im_start|>user\n \n \n USER: What's the difference of two images?\n ASSISTANT: \n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with", + "<|im_start|>user\n \n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a grassy hill. The alpaca has", + ], + ("cuda", None): [ + "<|im_start|>user\n \n \n USER: What's the difference of two images?\n ASSISTANT: \n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with", + "<|im_start|>user\n \n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a patch of ground with some dry grass. The", + ], + ("xpu", 3): [ + "<|im_start|>user\n \n \n USER: What's the difference of two images?\n ASSISTANT: \n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with", + "<|im_start|>user\n \n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a patch of ground with some dry grass. The", + ], + } + ) # fmt: skip + EXPECTED_OUTPUT = EXPECTED_OUTPUTS.get_expectation() generate_ids = model.generate(**inputs, max_new_tokens=20) outputs = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) - self.assertListEqual(outputs, EXPECTED_OUTPUT[model.device.type]) + self.assertListEqual(outputs, EXPECTED_OUTPUT) def test_tokenizer_integration(self): model_id = "rhymes-ai/Aria" diff --git a/tests/models/aya_vision/test_modeling_aya_vision.py b/tests/models/aya_vision/test_modeling_aya_vision.py index eaa5aebe846..5cde1f216ec 100644 --- a/tests/models/aya_vision/test_modeling_aya_vision.py +++ b/tests/models/aya_vision/test_modeling_aya_vision.py @@ -422,7 +422,7 @@ class AyaVisionIntegrationTest(unittest.TestCase): expected_outputs = Expectations( { - ("xpu", 3): "Whispers on the breeze,\nLeaves dance under moonlit sky,\nNature's quiet song.", + ("xpu", 3): "Whispers on the breeze,\nLeaves dance under moonlit skies,\nNature's quiet song.", # 4-bit ("cuda", 7): "Sure, here's a haiku for you:\n\nMorning dew sparkles,\nPetals unfold in sunlight,\n", ("cuda", 8): "Whispers on the breeze,\nLeaves dance under moonlit skies,\nNature's quiet song.", @@ -434,6 +434,7 @@ class AyaVisionIntegrationTest(unittest.TestCase): @slow @require_torch_accelerator + @require_deterministic_for_xpu def test_small_model_integration_generate_chat_template(self): processor = AutoProcessor.from_pretrained(self.model_checkpoint) model = self.get_model() @@ -458,7 +459,7 @@ class AyaVisionIntegrationTest(unittest.TestCase): expected_outputs = Expectations( { - ("xpu", 3): "The image depicts a cozy scene of two cats resting on a bright pink blanket. The cats,", + ("xpu", 3): 'The image depicts a cozy scene of two cats resting on a bright pink blanket. The cats,', # 4-bit ("cuda", 7): 'The image depicts two cats comfortably resting on a pink blanket spread across a sofa. The cats,', ("cuda", 8): 'The image depicts a cozy scene of two cats resting on a bright pink blanket. The cats,', diff --git a/tests/models/gpt2/test_modeling_gpt2.py b/tests/models/gpt2/test_modeling_gpt2.py index 64ebd236a23..d0796468c39 100644 --- a/tests/models/gpt2/test_modeling_gpt2.py +++ b/tests/models/gpt2/test_modeling_gpt2.py @@ -823,6 +823,7 @@ class GPT2ModelLanguageGenerationTest(unittest.TestCase): ("rocm", None): 'Today is a nice day and we can do this again."\n\nDana said that she will', ("rocm", (9, 5)): "Today is a nice day and if you don't know anything about the state of play during your holiday", ("cuda", None): "Today is a nice day and if you don't know anything about the state of play during your holiday", + ("xpu", 3): "Today is a nice day and if you don't know anything about the state of play during your holiday", } ) # fmt: skip EXPECTED_OUTPUT = expected_outputs.get_expectation() diff --git a/tests/models/idefics2/test_modeling_idefics2.py b/tests/models/idefics2/test_modeling_idefics2.py index f8f2ac414d1..6ce19ddfade 100644 --- a/tests/models/idefics2/test_modeling_idefics2.py +++ b/tests/models/idefics2/test_modeling_idefics2.py @@ -624,6 +624,7 @@ class Idefics2ForConditionalGenerationIntegrationTest(unittest.TestCase): expected_generated_texts = Expectations( { + ("xpu", 3): "In this image, we see the Statue of Liberty, the Hudson River,", ("cuda", None): "In this image, we see the Statue of Liberty, the Hudson River,", ("rocm", (9, 5)): "In this image, we see the Statue of Liberty, the New York City", } diff --git a/tests/models/llava_onevision/test_modeling_llava_onevision.py b/tests/models/llava_onevision/test_modeling_llava_onevision.py index 9915d47e0e2..f482f0a0680 100644 --- a/tests/models/llava_onevision/test_modeling_llava_onevision.py +++ b/tests/models/llava_onevision/test_modeling_llava_onevision.py @@ -389,16 +389,15 @@ class LlavaOnevisionForConditionalGenerationIntegrationTest(unittest.TestCase): EXPECTED_DECODED_TEXTS = Expectations( { + ("xpu", 3): 'user\n\nWhat do you see in this image?\nassistant\nThe image is a radar chart that compares the performance of different models in a specific task, likely related to natural language processing or machine learning. The chart is divided into several axes, each representing a different model or method. The models are color-coded and labeled with their respective names. The axes are labeled with terms such as "VQA," "GQA," "MQA," "VQAv2," "MM-Vet," "LLaVA-Bench," "LLaVA-1', ("cuda", 7): 'user\n\nWhat do you see in this image?\nassistant\nThe image is a radar chart that compares the performance of different models in a specific task, likely related to natural language processing or machine learning. The chart is divided into several axes, each representing a different model or method. The models are color-coded and labeled with their respective names. The axes are labeled with terms such as "VQA," "GQA," "MQA," "VQAv2," "MM-Vet," "LLaVA-Bench," "LLaVA-1', ("cuda", 8): 'user\n\nWhat do you see in this image?\nassistant\nThe image is a radar chart that compares the performance of different models in a specific task, likely related to natural language processing or machine learning. The chart is divided into several axes, each representing a different model or method. The models are color-coded and labeled with their respective names. The axes are labeled with terms such as "VQA," "GQA," "MQA," "VIZ," "TextVQA," "SQA-IMG," and "MQE." The radar chart shows', } ) # fmt: skip EXPECTED_DECODED_TEXT = EXPECTED_DECODED_TEXTS.get_expectation() + DECODED_TEXT = self.processor.decode(output[0], skip_special_tokens=True) - self.assertEqual( - self.processor.decode(output[0], skip_special_tokens=True), - EXPECTED_DECODED_TEXT, - ) + self.assertEqual(DECODED_TEXT, EXPECTED_DECODED_TEXT) @slow @require_bitsandbytes diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py index 3b53e1cfa53..94ceb0e4a70 100644 --- a/tests/models/mixtral/test_modeling_mixtral.py +++ b/tests/models/mixtral/test_modeling_mixtral.py @@ -194,6 +194,7 @@ class MixtralIntegrationTest(unittest.TestCase): # fmt: off EXPECTED_LOGITS_LEFT_UNPADDED = Expectations( { + ("xpu", 3): torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7070, 0.2461]]).to(torch_device), ("cuda", 7): torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2275, 0.6054], [0.2656, -0.7070, 0.2460]]).to(torch_device), ("cuda", 8): torch.Tensor([[0.2207, 0.5234, -0.3828], [0.8203, -0.2285, 0.6055], [0.2656, -0.7109, 0.2451]]).to(torch_device), ("rocm", 9): torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]]).to(torch_device), @@ -203,6 +204,7 @@ class MixtralIntegrationTest(unittest.TestCase): EXPECTED_LOGITS_RIGHT_UNPADDED = Expectations( { + ("xpu", 3): torch.Tensor([[0.2178, 0.1270, -0.1641], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to(torch_device), ("cuda", 7): torch.Tensor([[0.2167, 0.1269, -0.1640], [-0.3496, 0.2988, -1.0312], [0.0688, 0.7929, 0.8007]]).to(torch_device), ("cuda", 8): torch.Tensor([[0.2178, 0.1270, -0.1621], [-0.3496, 0.3008, -1.0312], [0.0693, 0.7930, 0.7969]]).to(torch_device), ("rocm", 9): torch.Tensor([[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]]).to(torch_device), diff --git a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py index 5299b6a2c11..72669fd390f 100644 --- a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py +++ b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py @@ -28,6 +28,7 @@ from transformers import ( is_vision_available, ) from transformers.testing_utils import ( + Expectations, backend_empty_cache, require_flash_attn, require_torch, @@ -482,15 +483,23 @@ class Qwen2VLIntegrationTest(unittest.TestCase): # it should not matter whether two images are the same size or not output = model.generate(**inputs, max_new_tokens=30) + DECODED_TEXT = self.processor.batch_decode(output, skip_special_tokens=True) - EXPECTED_DECODED_TEXT = [ - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices', - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets' - ] # fmt: skip - self.assertEqual( - self.processor.batch_decode(output, skip_special_tokens=True), - EXPECTED_DECODED_TEXT, - ) + EXPECTED_DECODED_TEXTS = Expectations( + { + ("xpu", 3): [ + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices', + ], + ("cuda", None): [ + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets', + ], + } + ) # fmt: skip + EXPECTED_DECODED_TEXT = EXPECTED_DECODED_TEXTS.get_expectation() + + self.assertEqual(DECODED_TEXT, EXPECTED_DECODED_TEXT) @slow @require_flash_attn diff --git a/tests/models/qwen3/test_modeling_qwen3.py b/tests/models/qwen3/test_modeling_qwen3.py index 3f3f5bae083..5f961ac79e0 100644 --- a/tests/models/qwen3/test_modeling_qwen3.py +++ b/tests/models/qwen3/test_modeling_qwen3.py @@ -207,6 +207,7 @@ class Qwen3IntegrationTest(unittest.TestCase): def test_speculative_generation(self): EXPECTED_TEXT_COMPLETIONS = Expectations( { + ("xpu", 3): "My favourite condiment is 100% peanut butter. I love it so much that I can't help but use it", ("cuda", 7): "My favourite condiment is 100% natural. It's a little spicy and a little sweet, but it's the", ("cuda", 8): "My favourite condiment is 100% peanut butter. I love it so much that I can't help but use it", } diff --git a/tests/quantization/quanto_integration/test_quanto.py b/tests/quantization/quanto_integration/test_quanto.py index 766faafbbfa..a4e0b478697 100644 --- a/tests/quantization/quanto_integration/test_quanto.py +++ b/tests/quantization/quanto_integration/test_quanto.py @@ -223,7 +223,9 @@ class QuantoQuantizationTest(unittest.TestCase): with tempfile.TemporaryDirectory() as tmpdirname: with self.assertRaises(ValueError) as e: self.quantized_model.save_pretrained(tmpdirname, safe_serialization=False) - self.assertIn("The model is quantized with quanto and is not serializable", str(e.exception)) + self.assertIn( + "The model is quantized with QuantizationMethod.QUANTO and is not serializable", str(e.exception) + ) # TODO: replace by the following when it works # quantized_model_from_saved = AutoModelForCausalLM.from_pretrained( # tmpdirname, torch_dtype=torch.float32, device_map="cpu" @@ -237,7 +239,9 @@ class QuantoQuantizationTest(unittest.TestCase): with tempfile.TemporaryDirectory() as tmpdirname: with self.assertRaises(ValueError) as e: self.quantized_model.save_pretrained(tmpdirname) - self.assertIn("The model is quantized with quanto and is not serializable", str(e.exception)) + self.assertIn( + "The model is quantized with QuantizationMethod.QUANTO and is not serializable", str(e.exception) + ) # quantized_model_from_saved = AutoModelForCausalLM.from_pretrained( # tmpdirname, torch_dtype=torch.float32, device_map="cpu" # )