Fix qwen2_5_vl tests (#38845)

* fix * breakpoint() * breakpoint() * update * update * update * update * update * update --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-07-04 05:10:06 +06:00 · 2025-06-17 10:55:24 +02:00 · 2025-06-17 10:55:24 +02:00 · c61ca64aaa
commit c61ca64aaa
parent 37367c7d9f
1 changed files with 20 additions and 16 deletions
--- a/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py
+++ b/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py
@ -14,7 +14,6 @@
 """Testing suite for the PyTorch Qwen2.5-VL model."""
 import copy
 import gc
 import tempfile
 import unittest
@ -29,7 +28,7 @@ from transformers import (
    is_vision_available,
 )
 from transformers.testing_utils import (
-    backend_empty_cache,
+    cleanup,
    is_flaky,
    require_cv2,
    require_flash_attn,
@ -408,9 +407,10 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase):
        url = "https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen2-VL/demo_small.jpg"
        self.image = Image.open(requests.get(url, stream=True).raw)
        cleanup(torch_device, gc_collect=True)
    def tearDown(self):
-        gc.collect()
+        cleanup(torch_device, gc_collect=True)
        backend_empty_cache(torch_device)
    @slow
    def test_small_model_integration_test(self):
@ -422,7 +422,7 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase):
        inputs = self.processor(text=[text], images=[self.image], return_tensors="pt")
        expected_input_ids = [151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 151652, 151655, 151655]  # fmt: skip
-        assert torch.allclose(expected_input_ids, inputs.input_ids[0].tolist()[:17], atol=3e-3)
+        torch.testing.assert_close(expected_input_ids, inputs.input_ids[0].tolist()[:17])
        expected_pixel_slice = torch.tensor(
            [
@ -436,13 +436,13 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase):
            dtype=torch.float32,
            device="cpu",
        )
-        assert torch.allclose(expected_pixel_slice, inputs.pixel_values[:6, :3], atol=3e-3)
+        torch.testing.assert_close(expected_pixel_slice, inputs.pixel_values[:6, :3], atol=5e-4, rtol=1e-5)
        # verify generation
        inputs = inputs.to(torch_device)
        output = model.generate(**inputs, max_new_tokens=30)
-        EXPECTED_DECODED_TEXT = "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets"
+        EXPECTED_DECODED_TEXT = "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in"
        self.assertEqual(
            self.processor.decode(output[0], skip_special_tokens=True),
@ -463,9 +463,10 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase):
        output = model.generate(**inputs, max_new_tokens=30)
        EXPECTED_DECODED_TEXT = [
-            'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices',
+            'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in',
-            'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets'
+            'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in',
        ]  # fmt: skip
        self.assertEqual(
            self.processor.batch_decode(output, skip_special_tokens=True),
            EXPECTED_DECODED_TEXT,
@ -482,10 +483,11 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase):
        output = model.generate(**inputs, max_new_tokens=30, num_return_sequences=3)
        EXPECTED_DECODED_TEXT = [
-            'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices',
+            'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in',
-            'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices',
+            'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in',
-            'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices',
+            'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in',
        ]  # fmt: skip
        self.assertEqual(
            self.processor.batch_decode(output, skip_special_tokens=True),
            EXPECTED_DECODED_TEXT,
@ -510,9 +512,10 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase):
        output = model.generate(**inputs, max_new_tokens=30)
        EXPECTED_DECODED_TEXT = [
-            'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets',
+            'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in',
-            'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to assist with various tasks and answer questions to the best of my'
+            'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\n addCriterion',
        ]  # fmt: skip
        self.assertEqual(
            self.processor.batch_decode(output, skip_special_tokens=True),
            EXPECTED_DECODED_TEXT,
@ -537,9 +540,10 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase):
        output = model.generate(**inputs, max_new_tokens=30)
        EXPECTED_DECODED_TEXT = [
-            "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets",
+            "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in",
-            "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets",
+            "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\n addCriterion\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is",
        ]
        self.assertEqual(
            self.processor.batch_decode(output, skip_special_tokens=True),
            EXPECTED_DECODED_TEXT,