fix a bunch of XPU UT failures on stock PyTorch 2.7 and 2.8 (#39069)

* fix a bunch of XPU UT failures on stock PyTorch 2.7 and 2.8

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* qwen3

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* quanto

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* models

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* fix style

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* idefics2

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

---------

Signed-off-by: YAO Matrix <matrix.yao@intel.com>
This commit is contained in:
Yao Matrix 2025-06-27 20:01:53 +08:00 committed by GitHub
parent cb17103bd5
commit 0106a50a6b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 53 additions and 31 deletions

View File

@ -30,6 +30,7 @@ from transformers import (
)
from transformers.models.idefics3 import Idefics3VisionConfig
from transformers.testing_utils import (
Expectations,
backend_empty_cache,
require_bitsandbytes,
require_torch,
@ -483,23 +484,26 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
device=model.device, dtype=model.dtype
)
EXPECTED_OUTPUT = {
"cpu": [
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n <image>\n USER: What's the difference of two images?\n ASSISTANT:<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with",
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a grassy hill. The alpaca has",
], # cpu output
"cuda": [
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n <image>\n USER: What's the difference of two images?\n ASSISTANT:<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with",
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a patch of ground with some dry grass. The",
], # cuda output
"xpu": [
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n <image>\n USER: What's the difference of two images?\n ASSISTANT:<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with",
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a grassy hill. The alpaca has",
], # xpu output
}
EXPECTED_OUTPUTS = Expectations(
{
("cpu", None): [
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n <image>\n USER: What's the difference of two images?\n ASSISTANT:<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with",
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a grassy hill. The alpaca has",
],
("cuda", None): [
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n <image>\n USER: What's the difference of two images?\n ASSISTANT:<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with",
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a patch of ground with some dry grass. The",
],
("xpu", 3): [
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n <image>\n USER: What's the difference of two images?\n ASSISTANT:<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with",
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a patch of ground with some dry grass. The",
],
}
) # fmt: skip
EXPECTED_OUTPUT = EXPECTED_OUTPUTS.get_expectation()
generate_ids = model.generate(**inputs, max_new_tokens=20)
outputs = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
self.assertListEqual(outputs, EXPECTED_OUTPUT[model.device.type])
self.assertListEqual(outputs, EXPECTED_OUTPUT)
def test_tokenizer_integration(self):
model_id = "rhymes-ai/Aria"

View File

@ -422,7 +422,7 @@ class AyaVisionIntegrationTest(unittest.TestCase):
expected_outputs = Expectations(
{
("xpu", 3): "Whispers on the breeze,\nLeaves dance under moonlit sky,\nNature's quiet song.",
("xpu", 3): "Whispers on the breeze,\nLeaves dance under moonlit skies,\nNature's quiet song.",
# 4-bit
("cuda", 7): "Sure, here's a haiku for you:\n\nMorning dew sparkles,\nPetals unfold in sunlight,\n",
("cuda", 8): "Whispers on the breeze,\nLeaves dance under moonlit skies,\nNature's quiet song.",
@ -434,6 +434,7 @@ class AyaVisionIntegrationTest(unittest.TestCase):
@slow
@require_torch_accelerator
@require_deterministic_for_xpu
def test_small_model_integration_generate_chat_template(self):
processor = AutoProcessor.from_pretrained(self.model_checkpoint)
model = self.get_model()
@ -458,7 +459,7 @@ class AyaVisionIntegrationTest(unittest.TestCase):
expected_outputs = Expectations(
{
("xpu", 3): "The image depicts a cozy scene of two cats resting on a bright pink blanket. The cats,",
("xpu", 3): 'The image depicts a cozy scene of two cats resting on a bright pink blanket. The cats,',
# 4-bit
("cuda", 7): 'The image depicts two cats comfortably resting on a pink blanket spread across a sofa. The cats,',
("cuda", 8): 'The image depicts a cozy scene of two cats resting on a bright pink blanket. The cats,',

View File

@ -823,6 +823,7 @@ class GPT2ModelLanguageGenerationTest(unittest.TestCase):
("rocm", None): 'Today is a nice day and we can do this again."\n\nDana said that she will',
("rocm", (9, 5)): "Today is a nice day and if you don't know anything about the state of play during your holiday",
("cuda", None): "Today is a nice day and if you don't know anything about the state of play during your holiday",
("xpu", 3): "Today is a nice day and if you don't know anything about the state of play during your holiday",
}
) # fmt: skip
EXPECTED_OUTPUT = expected_outputs.get_expectation()

View File

@ -624,6 +624,7 @@ class Idefics2ForConditionalGenerationIntegrationTest(unittest.TestCase):
expected_generated_texts = Expectations(
{
("xpu", 3): "In this image, we see the Statue of Liberty, the Hudson River,",
("cuda", None): "In this image, we see the Statue of Liberty, the Hudson River,",
("rocm", (9, 5)): "In this image, we see the Statue of Liberty, the New York City",
}

View File

@ -389,16 +389,15 @@ class LlavaOnevisionForConditionalGenerationIntegrationTest(unittest.TestCase):
EXPECTED_DECODED_TEXTS = Expectations(
{
("xpu", 3): 'user\n\nWhat do you see in this image?\nassistant\nThe image is a radar chart that compares the performance of different models in a specific task, likely related to natural language processing or machine learning. The chart is divided into several axes, each representing a different model or method. The models are color-coded and labeled with their respective names. The axes are labeled with terms such as "VQA," "GQA," "MQA," "VQAv2," "MM-Vet," "LLaVA-Bench," "LLaVA-1',
("cuda", 7): 'user\n\nWhat do you see in this image?\nassistant\nThe image is a radar chart that compares the performance of different models in a specific task, likely related to natural language processing or machine learning. The chart is divided into several axes, each representing a different model or method. The models are color-coded and labeled with their respective names. The axes are labeled with terms such as "VQA," "GQA," "MQA," "VQAv2," "MM-Vet," "LLaVA-Bench," "LLaVA-1',
("cuda", 8): 'user\n\nWhat do you see in this image?\nassistant\nThe image is a radar chart that compares the performance of different models in a specific task, likely related to natural language processing or machine learning. The chart is divided into several axes, each representing a different model or method. The models are color-coded and labeled with their respective names. The axes are labeled with terms such as "VQA," "GQA," "MQA," "VIZ," "TextVQA," "SQA-IMG," and "MQE." The radar chart shows',
}
) # fmt: skip
EXPECTED_DECODED_TEXT = EXPECTED_DECODED_TEXTS.get_expectation()
DECODED_TEXT = self.processor.decode(output[0], skip_special_tokens=True)
self.assertEqual(
self.processor.decode(output[0], skip_special_tokens=True),
EXPECTED_DECODED_TEXT,
)
self.assertEqual(DECODED_TEXT, EXPECTED_DECODED_TEXT)
@slow
@require_bitsandbytes

View File

@ -194,6 +194,7 @@ class MixtralIntegrationTest(unittest.TestCase):
# fmt: off
EXPECTED_LOGITS_LEFT_UNPADDED = Expectations(
{
("xpu", 3): torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7070, 0.2461]]).to(torch_device),
("cuda", 7): torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2275, 0.6054], [0.2656, -0.7070, 0.2460]]).to(torch_device),
("cuda", 8): torch.Tensor([[0.2207, 0.5234, -0.3828], [0.8203, -0.2285, 0.6055], [0.2656, -0.7109, 0.2451]]).to(torch_device),
("rocm", 9): torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]]).to(torch_device),
@ -203,6 +204,7 @@ class MixtralIntegrationTest(unittest.TestCase):
EXPECTED_LOGITS_RIGHT_UNPADDED = Expectations(
{
("xpu", 3): torch.Tensor([[0.2178, 0.1270, -0.1641], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to(torch_device),
("cuda", 7): torch.Tensor([[0.2167, 0.1269, -0.1640], [-0.3496, 0.2988, -1.0312], [0.0688, 0.7929, 0.8007]]).to(torch_device),
("cuda", 8): torch.Tensor([[0.2178, 0.1270, -0.1621], [-0.3496, 0.3008, -1.0312], [0.0693, 0.7930, 0.7969]]).to(torch_device),
("rocm", 9): torch.Tensor([[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]]).to(torch_device),

View File

@ -28,6 +28,7 @@ from transformers import (
is_vision_available,
)
from transformers.testing_utils import (
Expectations,
backend_empty_cache,
require_flash_attn,
require_torch,
@ -482,15 +483,23 @@ class Qwen2VLIntegrationTest(unittest.TestCase):
# it should not matter whether two images are the same size or not
output = model.generate(**inputs, max_new_tokens=30)
DECODED_TEXT = self.processor.batch_decode(output, skip_special_tokens=True)
EXPECTED_DECODED_TEXT = [
'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices',
'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets'
] # fmt: skip
self.assertEqual(
self.processor.batch_decode(output, skip_special_tokens=True),
EXPECTED_DECODED_TEXT,
)
EXPECTED_DECODED_TEXTS = Expectations(
{
("xpu", 3): [
'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices',
'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices',
],
("cuda", None): [
'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices',
'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets',
],
}
) # fmt: skip
EXPECTED_DECODED_TEXT = EXPECTED_DECODED_TEXTS.get_expectation()
self.assertEqual(DECODED_TEXT, EXPECTED_DECODED_TEXT)
@slow
@require_flash_attn

View File

@ -207,6 +207,7 @@ class Qwen3IntegrationTest(unittest.TestCase):
def test_speculative_generation(self):
EXPECTED_TEXT_COMPLETIONS = Expectations(
{
("xpu", 3): "My favourite condiment is 100% peanut butter. I love it so much that I can't help but use it",
("cuda", 7): "My favourite condiment is 100% natural. It's a little spicy and a little sweet, but it's the",
("cuda", 8): "My favourite condiment is 100% peanut butter. I love it so much that I can't help but use it",
}

View File

@ -223,7 +223,9 @@ class QuantoQuantizationTest(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdirname:
with self.assertRaises(ValueError) as e:
self.quantized_model.save_pretrained(tmpdirname, safe_serialization=False)
self.assertIn("The model is quantized with quanto and is not serializable", str(e.exception))
self.assertIn(
"The model is quantized with QuantizationMethod.QUANTO and is not serializable", str(e.exception)
)
# TODO: replace by the following when it works
# quantized_model_from_saved = AutoModelForCausalLM.from_pretrained(
# tmpdirname, torch_dtype=torch.float32, device_map="cpu"
@ -237,7 +239,9 @@ class QuantoQuantizationTest(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdirname:
with self.assertRaises(ValueError) as e:
self.quantized_model.save_pretrained(tmpdirname)
self.assertIn("The model is quantized with quanto and is not serializable", str(e.exception))
self.assertIn(
"The model is quantized with QuantizationMethod.QUANTO and is not serializable", str(e.exception)
)
# quantized_model_from_saved = AutoModelForCausalLM.from_pretrained(
# tmpdirname, torch_dtype=torch.float32, device_map="cpu"
# )