diff --git a/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py b/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py index 99c26096113..7894dc69806 100644 --- a/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py +++ b/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py @@ -14,7 +14,6 @@ """Testing suite for the PyTorch Qwen2.5-VL model.""" import copy -import gc import tempfile import unittest @@ -29,7 +28,7 @@ from transformers import ( is_vision_available, ) from transformers.testing_utils import ( - backend_empty_cache, + cleanup, is_flaky, require_cv2, require_flash_attn, @@ -408,9 +407,10 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase): url = "https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen2-VL/demo_small.jpg" self.image = Image.open(requests.get(url, stream=True).raw) + cleanup(torch_device, gc_collect=True) + def tearDown(self): - gc.collect() - backend_empty_cache(torch_device) + cleanup(torch_device, gc_collect=True) @slow def test_small_model_integration_test(self): @@ -422,7 +422,7 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase): inputs = self.processor(text=[text], images=[self.image], return_tensors="pt") expected_input_ids = [151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 151652, 151655, 151655] # fmt: skip - assert torch.allclose(expected_input_ids, inputs.input_ids[0].tolist()[:17], atol=3e-3) + torch.testing.assert_close(expected_input_ids, inputs.input_ids[0].tolist()[:17]) expected_pixel_slice = torch.tensor( [ @@ -436,13 +436,13 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase): dtype=torch.float32, device="cpu", ) - assert torch.allclose(expected_pixel_slice, inputs.pixel_values[:6, :3], atol=3e-3) + torch.testing.assert_close(expected_pixel_slice, inputs.pixel_values[:6, :3], atol=5e-4, rtol=1e-5) # verify generation inputs = inputs.to(torch_device) output = model.generate(**inputs, max_new_tokens=30) - EXPECTED_DECODED_TEXT = "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets" + EXPECTED_DECODED_TEXT = "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in" self.assertEqual( self.processor.decode(output[0], skip_special_tokens=True), @@ -463,9 +463,10 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase): output = model.generate(**inputs, max_new_tokens=30) EXPECTED_DECODED_TEXT = [ - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices', - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets' + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', ] # fmt: skip + self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT, @@ -482,10 +483,11 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase): output = model.generate(**inputs, max_new_tokens=30, num_return_sequences=3) EXPECTED_DECODED_TEXT = [ - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices', - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices', - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', ] # fmt: skip + self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT, @@ -510,9 +512,10 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase): output = model.generate(**inputs, max_new_tokens=30) EXPECTED_DECODED_TEXT = [ - 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets', - 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to assist with various tasks and answer questions to the best of my' + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in', + 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\n addCriterion', ] # fmt: skip + self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT, @@ -537,9 +540,10 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase): output = model.generate(**inputs, max_new_tokens=30) EXPECTED_DECODED_TEXT = [ - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and energetic nature, which is evident in", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\n addCriterion\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and gentle nature, which is", ] + self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT,