diff --git a/tests/models/beit/test_image_processing_beit.py b/tests/models/beit/test_image_processing_beit.py index dca26e04929..b0c8ce4a4f4 100644 --- a/tests/models/beit/test_image_processing_beit.py +++ b/tests/models/beit/test_image_processing_beit.py @@ -16,13 +16,12 @@ import unittest -import numpy as np from datasets import load_dataset from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -81,6 +80,20 @@ class BeitImageProcessingTester(unittest.TestCase): "do_reduce_labels": self.do_reduce_labels, } + def expected_output_image_shape(self, images): + return self.num_channels, self.crop_size["height"], self.crop_size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + def prepare_semantic_single_inputs(): dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") @@ -104,7 +117,7 @@ def prepare_semantic_batch_inputs(): @require_torch @require_vision -class BeitImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class BeitImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = BeitImageProcessor if is_vision_available() else None def setUp(self): @@ -137,110 +150,11 @@ class BeitImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) self.assertEqual(image_processor.do_reduce_labels, True) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - def test_call_segmentation_maps(self): # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True) maps = [] for image in image_inputs: self.assertIsInstance(image, torch.Tensor) diff --git a/tests/models/blip/test_image_processing_blip.py b/tests/models/blip/test_image_processing_blip.py index 245c722ed40..398a8c02b99 100644 --- a/tests/models/blip/test_image_processing_blip.py +++ b/tests/models/blip/test_image_processing_blip.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import BlipImageProcessor @@ -76,40 +69,24 @@ class BlipImageProcessingTester(unittest.TestCase): "do_pad": self.do_pad, } - def prepare_inputs(self, equal_resolution=False, numpify=False, torchify=False): - """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True, - or a list of PyTorch tensors if one specifies torchify=True. - """ + def expected_output_image_shape(self, images): + return 3, self.size["height"], self.size["width"] - assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time" - - if equal_resolution: - image_inputs = [] - for i in range(self.batch_size): - image_inputs.append( - np.random.randint( - 255, size=(self.num_channels, self.max_resolution, self.max_resolution), dtype=np.uint8 - ) - ) - else: - image_inputs = [] - for i in range(self.batch_size): - width, height = np.random.choice(np.arange(self.min_resolution, self.max_resolution), 2) - image_inputs.append(np.random.randint(255, size=(self.num_channels, width, height), dtype=np.uint8)) - - if not numpify and not torchify: - # PIL expects the channel dimension as last dimension - image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs] - - if torchify: - image_inputs = [torch.from_numpy(x) for x in image_inputs] - - return image_inputs + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) @require_torch @require_vision -class BlipImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class BlipImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = BlipImageProcessor if is_vision_available() else None def setUp(self): @@ -128,109 +105,10 @@ class BlipImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase) self.assertTrue(hasattr(image_processor, "image_std")) self.assertTrue(hasattr(image_processor, "do_convert_rgb")) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processor(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processor(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processor(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processor(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processor(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processor(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - @require_torch @require_vision -class BlipImageProcessingTestFourChannels(ImageProcessingSavingTestMixin, unittest.TestCase): +class BlipImageProcessingTestFourChannels(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = BlipImageProcessor if is_vision_available() else None def setUp(self): @@ -250,37 +128,10 @@ class BlipImageProcessingTestFourChannels(ImageProcessingSavingTestMixin, unitte self.assertTrue(hasattr(image_processor, "image_std")) self.assertTrue(hasattr(image_processor, "do_convert_rgb")) - def test_batch_feature(self): - pass + @unittest.skip("BlipImageProcessor does not support 4 channels yet") # FIXME Amy + def test_call_numpy(self): + return super().test_call_numpy() - def test_call_pil_four_channels(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processor(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.expected_encoded_image_num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processor(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.expected_encoded_image_num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) + @unittest.skip("BlipImageProcessor does not support 4 channels yet") # FIXME Amy + def test_call_pytorch(self): + return super().test_call_torch() diff --git a/tests/models/bridgetower/test_image_processing_bridgetower.py b/tests/models/bridgetower/test_image_processing_bridgetower.py index 80ea966c2a8..f8837fdc964 100644 --- a/tests/models/bridgetower/test_image_processing_bridgetower.py +++ b/tests/models/bridgetower/test_image_processing_bridgetower.py @@ -17,17 +17,12 @@ import unittest from typing import Dict, List, Optional, Union -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): from PIL import Image @@ -119,10 +114,25 @@ class BridgeTowerImageProcessingTester(unittest.TestCase): return expected_height, expected_width + def expected_output_image_shape(self, images): + height, width = self.get_expected_values(images, batched=True) + return self.num_channels, height, width + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class BridgeTowerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class BridgeTowerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = BridgeTowerImageProcessor if is_vision_available() else None def setUp(self): @@ -140,99 +150,3 @@ class BridgeTowerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te self.assertTrue(hasattr(image_processing, "do_resize")) self.assertTrue(hasattr(image_processing, "size")) self.assertTrue(hasattr(image_processing, "size_divisor")) - - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image processor - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_numpy(self): - # Initialize image processor - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_pytorch(self): - # Initialize image processor - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) diff --git a/tests/models/chinese_clip/test_image_processing_chinese_clip.py b/tests/models/chinese_clip/test_image_processing_chinese_clip.py index 90b0ffbcd6c..e8f51b29fbc 100644 --- a/tests/models/chinese_clip/test_image_processing_chinese_clip.py +++ b/tests/models/chinese_clip/test_image_processing_chinese_clip.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import ChineseCLIPImageProcessor @@ -80,40 +73,24 @@ class ChineseCLIPImageProcessingTester(unittest.TestCase): "do_convert_rgb": self.do_convert_rgb, } - def prepare_inputs(self, equal_resolution=False, numpify=False, torchify=False): - """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True, - or a list of PyTorch tensors if one specifies torchify=True. - """ + def expected_output_image_shape(self, images): + return 3, self.crop_size["height"], self.crop_size["width"] - assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time" - - if equal_resolution: - image_inputs = [] - for i in range(self.batch_size): - image_inputs.append( - np.random.randint( - 255, size=(self.num_channels, self.max_resolution, self.max_resolution), dtype=np.uint8 - ) - ) - else: - image_inputs = [] - for i in range(self.batch_size): - width, height = np.random.choice(np.arange(self.min_resolution, self.max_resolution), 2) - image_inputs.append(np.random.randint(255, size=(self.num_channels, width, height), dtype=np.uint8)) - - if not numpify and not torchify: - # PIL expects the channel dimension as last dimension - image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs] - - if torchify: - image_inputs = [torch.from_numpy(x) for x in image_inputs] - - return image_inputs + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) @require_torch @require_vision -class ChineseCLIPImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class ChineseCLIPImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = ChineseCLIPImageProcessor if is_vision_available() else None def setUp(self): @@ -143,109 +120,10 @@ class ChineseCLIPImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te self.assertEqual(image_processor.size, {"shortest_edge": 42}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - @require_torch @require_vision -class ChineseCLIPImageProcessingTestFourChannels(ImageProcessingSavingTestMixin, unittest.TestCase): +class ChineseCLIPImageProcessingTestFourChannels(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = ChineseCLIPImageProcessor if is_vision_available() else None def setUp(self): @@ -267,37 +145,10 @@ class ChineseCLIPImageProcessingTestFourChannels(ImageProcessingSavingTestMixin, self.assertTrue(hasattr(image_processing, "image_std")) self.assertTrue(hasattr(image_processing, "do_convert_rgb")) - def test_batch_feature(self): - pass + @unittest.skip("ChineseCLIPImageProcessor does not support 4 channels yet") # FIXME Amy + def test_call_numpy(self): + return super().test_call_numpy() - def test_call_pil_four_channels(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.expected_encoded_image_num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.expected_encoded_image_num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) + @unittest.skip("ChineseCLIPImageProcessor does not support 4 channels yet") # FIXME Amy + def test_call_pytorch(self): + return super().test_call_torch() diff --git a/tests/models/clip/test_image_processing_clip.py b/tests/models/clip/test_image_processing_clip.py index 00a43a6bb43..a35a23d8da9 100644 --- a/tests/models/clip/test_image_processing_clip.py +++ b/tests/models/clip/test_image_processing_clip.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import CLIPImageProcessor @@ -80,40 +73,24 @@ class CLIPImageProcessingTester(unittest.TestCase): "do_convert_rgb": self.do_convert_rgb, } - def prepare_inputs(self, equal_resolution=False, numpify=False, torchify=False): - """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True, - or a list of PyTorch tensors if one specifies torchify=True. - """ + def expected_output_image_shape(self, images): + return self.num_channels, self.crop_size["height"], self.crop_size["width"] - assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time" - - if equal_resolution: - image_inputs = [] - for i in range(self.batch_size): - image_inputs.append( - np.random.randint( - 255, size=(self.num_channels, self.max_resolution, self.max_resolution), dtype=np.uint8 - ) - ) - else: - image_inputs = [] - for i in range(self.batch_size): - width, height = np.random.choice(np.arange(self.min_resolution, self.max_resolution), 2) - image_inputs.append(np.random.randint(255, size=(self.num_channels, width, height), dtype=np.uint8)) - - if not numpify and not torchify: - # PIL expects the channel dimension as last dimension - image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs] - - if torchify: - image_inputs = [torch.from_numpy(x) for x in image_inputs] - - return image_inputs + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) @require_torch @require_vision -class CLIPImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class CLIPImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = CLIPImageProcessor if is_vision_available() else None def setUp(self): @@ -142,162 +119,3 @@ class CLIPImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase) image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42, crop_size=84) self.assertEqual(image_processor.size, {"shortest_edge": 42}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) - - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - -@require_torch -@require_vision -class CLIPImageProcessingTestFourChannels(ImageProcessingSavingTestMixin, unittest.TestCase): - image_processing_class = CLIPImageProcessor if is_vision_available() else None - - def setUp(self): - self.image_processor_tester = CLIPImageProcessingTester(self, num_channels=4) - self.expected_encoded_image_num_channels = 3 - - @property - def image_processor_dict(self): - return self.image_processor_tester.prepare_image_processor_dict() - - def test_image_processor_properties(self): - image_processing = self.image_processing_class(**self.image_processor_dict) - self.assertTrue(hasattr(image_processing, "do_resize")) - self.assertTrue(hasattr(image_processing, "size")) - self.assertTrue(hasattr(image_processing, "do_center_crop")) - self.assertTrue(hasattr(image_processing, "center_crop")) - self.assertTrue(hasattr(image_processing, "do_normalize")) - self.assertTrue(hasattr(image_processing, "image_mean")) - self.assertTrue(hasattr(image_processing, "image_std")) - self.assertTrue(hasattr(image_processing, "do_convert_rgb")) - - def test_batch_feature(self): - pass - - def test_call_pil_four_channels(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.expected_encoded_image_num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.expected_encoded_image_num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) diff --git a/tests/models/conditional_detr/test_image_processing_conditional_detr.py b/tests/models/conditional_detr/test_image_processing_conditional_detr.py index 98510a3c00f..cc3fd501a64 100644 --- a/tests/models/conditional_detr/test_image_processing_conditional_detr.py +++ b/tests/models/conditional_detr/test_image_processing_conditional_detr.py @@ -18,12 +18,10 @@ import json import pathlib import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -111,10 +109,25 @@ class ConditionalDetrImageProcessingTester(unittest.TestCase): return expected_height, expected_width + def expected_output_image_shape(self, images): + height, width = self.get_expected_values(images, batched=True) + return self.num_channels, height, width + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class ConditionalDetrImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class ConditionalDetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = ConditionalDetrImageProcessor if is_vision_available() else None def setUp(self): @@ -143,107 +156,6 @@ class ConditionalDetrImageProcessingTest(ImageProcessingSavingTestMixin, unittes self.assertEqual(image_processor.size, {"shortest_edge": 42, "longest_edge": 84}) self.assertEqual(image_processor.do_pad, False) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - @slow def test_call_pytorch_with_coco_detection_annotations(self): # prepare image and target diff --git a/tests/models/convnext/test_image_processing_convnext.py b/tests/models/convnext/test_image_processing_convnext.py index ced0765c352..0c331741807 100644 --- a/tests/models/convnext/test_image_processing_convnext.py +++ b/tests/models/convnext/test_image_processing_convnext.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import ConvNextImageProcessor @@ -73,10 +66,24 @@ class ConvNextImageProcessingTester(unittest.TestCase): "crop_pct": self.crop_pct, } + def expected_output_image_shape(self, images): + return self.num_channels, self.size["shortest_edge"], self.size["shortest_edge"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class ConvNextImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class ConvNextImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = ConvNextImageProcessor if is_vision_available() else None def setUp(self): @@ -101,102 +108,3 @@ class ConvNextImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestC image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42) self.assertEqual(image_processor.size, {"shortest_edge": 42}) - - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["shortest_edge"], - self.image_processor_tester.size["shortest_edge"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["shortest_edge"], - self.image_processor_tester.size["shortest_edge"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["shortest_edge"], - self.image_processor_tester.size["shortest_edge"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["shortest_edge"], - self.image_processor_tester.size["shortest_edge"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["shortest_edge"], - self.image_processor_tester.size["shortest_edge"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["shortest_edge"], - self.image_processor_tester.size["shortest_edge"], - ), - ) diff --git a/tests/models/deformable_detr/test_image_processing_deformable_detr.py b/tests/models/deformable_detr/test_image_processing_deformable_detr.py index 40bf405eebe..4fd2de49f78 100644 --- a/tests/models/deformable_detr/test_image_processing_deformable_detr.py +++ b/tests/models/deformable_detr/test_image_processing_deformable_detr.py @@ -18,12 +18,10 @@ import json import pathlib import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -111,10 +109,25 @@ class DeformableDetrImageProcessingTester(unittest.TestCase): return expected_height, expected_width + def expected_output_image_shape(self, images): + height, width = self.get_expected_values(images, batched=True) + return self.num_channels, height, width + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class DeformableDetrImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class DeformableDetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = DeformableDetrImageProcessor if is_vision_available() else None def setUp(self): @@ -145,107 +158,6 @@ class DeformableDetrImageProcessingTest(ImageProcessingSavingTestMixin, unittest self.assertEqual(image_processor.size, {"shortest_edge": 42, "longest_edge": 84}) self.assertEqual(image_processor.do_pad, False) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - @slow def test_call_pytorch_with_coco_detection_annotations(self): # prepare image and target diff --git a/tests/models/deit/test_image_processing_deit.py b/tests/models/deit/test_image_processing_deit.py index 4103fc8fb25..21dc3d9e95a 100644 --- a/tests/models/deit/test_image_processing_deit.py +++ b/tests/models/deit/test_image_processing_deit.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import DeiTImageProcessor @@ -78,10 +71,24 @@ class DeiTImageProcessingTester(unittest.TestCase): "image_std": self.image_std, } + def expected_output_image_shape(self, images): + return self.num_channels, self.crop_size["height"], self.crop_size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class DeiTImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class DeiTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = DeiTImageProcessor if is_vision_available() else None test_cast_dtype = True @@ -110,102 +117,3 @@ class DeiTImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase) image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42, crop_size=84) self.assertEqual(image_processor.size, {"height": 42, "width": 42}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) - - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) diff --git a/tests/models/deta/test_image_processing_deta.py b/tests/models/deta/test_image_processing_deta.py index b3e550fc4cf..7cde8474bf1 100644 --- a/tests/models/deta/test_image_processing_deta.py +++ b/tests/models/deta/test_image_processing_deta.py @@ -18,12 +18,10 @@ import json import pathlib import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -111,10 +109,25 @@ class DetaImageProcessingTester(unittest.TestCase): return expected_height, expected_width + def expected_output_image_shape(self, images): + height, width = self.get_expected_values(images, batched=True) + return self.num_channels, height, width + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class DetaImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class DetaImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = DetaImageProcessor if is_vision_available() else None def setUp(self): @@ -139,107 +152,6 @@ class DetaImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase) self.assertEqual(image_processor.size, {"shortest_edge": 18, "longest_edge": 1333}) self.assertEqual(image_processor.do_pad, True) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - @slow def test_call_pytorch_with_coco_detection_annotations(self): # prepare image and target diff --git a/tests/models/detr/test_image_processing_detr.py b/tests/models/detr/test_image_processing_detr.py index f923cb6726f..2a095b259ec 100644 --- a/tests/models/detr/test_image_processing_detr.py +++ b/tests/models/detr/test_image_processing_detr.py @@ -18,12 +18,10 @@ import json import pathlib import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -111,10 +109,25 @@ class DetrImageProcessingTester(unittest.TestCase): return expected_height, expected_width + def expected_output_image_shape(self, images): + height, width = self.get_expected_values(images, batched=True) + return self.num_channels, height, width + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class DetrImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class DetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = DetrImageProcessor if is_vision_available() else None def setUp(self): @@ -146,107 +159,6 @@ class DetrImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase) self.assertEqual(image_processor.size, {"shortest_edge": 42, "longest_edge": 84}) self.assertEqual(image_processor.do_pad, False) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - @slow def test_call_pytorch_with_coco_detection_annotations(self): # prepare image and target diff --git a/tests/models/donut/test_image_processing_donut.py b/tests/models/donut/test_image_processing_donut.py index bd992626494..c1a2bd3b26e 100644 --- a/tests/models/donut/test_image_processing_donut.py +++ b/tests/models/donut/test_image_processing_donut.py @@ -21,7 +21,7 @@ import numpy as np from transformers.testing_utils import is_flaky, require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -78,10 +78,24 @@ class DonutImageProcessingTester(unittest.TestCase): "image_std": self.image_std, } + def expected_output_image_shape(self, images): + return self.num_channels, self.size["height"], self.size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class DonutImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class DonutImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = DonutImageProcessor if is_vision_available() else None def setUp(self): @@ -113,15 +127,12 @@ class DonutImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=(42, 84)) self.assertEqual(image_processor.size, {"height": 84, "width": 42}) - def test_batch_feature(self): - pass - @is_flaky() def test_call_pil(self): # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) @@ -154,7 +165,7 @@ class DonutImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) @@ -187,7 +198,7 @@ class DonutImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) diff --git a/tests/models/dpt/test_image_processing_dpt.py b/tests/models/dpt/test_image_processing_dpt.py index 4cde4cbe73f..5670d50b913 100644 --- a/tests/models/dpt/test_image_processing_dpt.py +++ b/tests/models/dpt/test_image_processing_dpt.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - -from transformers.file_utils import is_torch_available, is_vision_available +from transformers.file_utils import is_vision_available from transformers.testing_utils import require_torch, require_vision -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import DPTImageProcessor @@ -70,10 +63,24 @@ class DPTImageProcessingTester(unittest.TestCase): "size": self.size, } + def expected_output_image_shape(self, images): + return self.num_channels, self.size["height"], self.size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class DPTImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = DPTImageProcessor if is_vision_available() else None def setUp(self): @@ -97,99 +104,3 @@ class DPTImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42) self.assertEqual(image_processor.size, {"height": 42, "width": 42}) - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) diff --git a/tests/models/efficientformer/test_image_processing_efficientformer.py b/tests/models/efficientformer/test_image_processing_efficientformer.py index ff2fcafd4b7..bd91b771061 100644 --- a/tests/models/efficientformer/test_image_processing_efficientformer.py +++ b/tests/models/efficientformer/test_image_processing_efficientformer.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import ViTImageProcessor @@ -70,18 +63,32 @@ class EfficientFormerImageProcessorTester(unittest.TestCase): "size": self.size, } + def expected_output_image_shape(self, images): + return self.num_channels, self.size["height"], self.size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class EfficientFormerImageProcessorTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class EfficientFormerImageProcessorTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = ViTImageProcessor if is_vision_available() else None def setUp(self): - self.image_proc_tester = EfficientFormerImageProcessorTester(self) + self.image_processor_tester = EfficientFormerImageProcessorTester(self) @property def image_processor_dict(self): - return self.image_proc_tester.prepare_image_processor_dict() + return self.image_processor_tester.prepare_image_processor_dict() def test_image_proc_properties(self): image_processor = self.image_processing_class(**self.image_processor_dict) @@ -90,102 +97,3 @@ class EfficientFormerImageProcessorTest(ImageProcessingSavingTestMixin, unittest self.assertTrue(hasattr(image_processor, "do_normalize")) self.assertTrue(hasattr(image_processor, "do_resize")) self.assertTrue(hasattr(image_processor, "size")) - - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_proc_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processor(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_proc_tester.num_channels, - self.image_proc_tester.size["height"], - self.image_proc_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processor(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_proc_tester.batch_size, - self.image_proc_tester.num_channels, - self.image_proc_tester.size["height"], - self.image_proc_tester.size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_proc_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processor(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_proc_tester.num_channels, - self.image_proc_tester.size["height"], - self.image_proc_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processor(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_proc_tester.batch_size, - self.image_proc_tester.num_channels, - self.image_proc_tester.size["height"], - self.image_proc_tester.size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_proc_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processor(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_proc_tester.num_channels, - self.image_proc_tester.size["height"], - self.image_proc_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processor(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_proc_tester.batch_size, - self.image_proc_tester.num_channels, - self.image_proc_tester.size["height"], - self.image_proc_tester.size["width"], - ), - ) diff --git a/tests/models/efficientnet/test_image_processing_efficientnet.py b/tests/models/efficientnet/test_image_processing_efficientnet.py index bc65e7acbf1..fd754d8eb9e 100644 --- a/tests/models/efficientnet/test_image_processing_efficientnet.py +++ b/tests/models/efficientnet/test_image_processing_efficientnet.py @@ -19,17 +19,12 @@ import unittest import numpy as np from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import EfficientNetImageProcessor @@ -70,10 +65,24 @@ class EfficientNetImageProcessorTester(unittest.TestCase): "size": self.size, } + def expected_output_image_shape(self, images): + return self.num_channels, self.size["height"], self.size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class EfficientNetImageProcessorTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class EfficientNetImageProcessorTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = EfficientNetImageProcessor if is_vision_available() else None def setUp(self): @@ -98,102 +107,6 @@ class EfficientNetImageProcessorTest(ImageProcessingSavingTestMixin, unittest.Te image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42) self.assertEqual(image_processor.size, {"height": 42, "width": 42}) - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - def test_rescale(self): # EfficientNet optionally rescales between -1 and 1 instead of the usual 0 and 1 image = np.arange(0, 256, 1, dtype=np.uint8).reshape(1, 8, 32) diff --git a/tests/models/flava/test_image_processing_flava.py b/tests/models/flava/test_image_processing_flava.py index f9751725697..8e04abafe1f 100644 --- a/tests/models/flava/test_image_processing_flava.py +++ b/tests/models/flava/test_image_processing_flava.py @@ -21,7 +21,7 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -156,10 +156,21 @@ class FlavaImageProcessingTester(unittest.TestCase): def get_expected_codebook_image_size(self): return (self.codebook_size["height"], self.codebook_size["width"]) + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class FlavaImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class FlavaImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = FlavaImageProcessor if is_vision_available() else None maxDiff = None @@ -207,14 +218,11 @@ class FlavaImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase self.assertEqual(image_processor.codebook_size, {"height": 33, "width": 33}) self.assertEqual(image_processor.codebook_crop_size, {"height": 66, "width": 66}) - def test_batch_feature(self): - pass - def test_call_pil(self): # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, PIL.Image.Image) @@ -252,7 +260,7 @@ class FlavaImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, **prepare_kwargs) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, **prepare_kwargs) for image in image_inputs: self.assertIsInstance(image, instance_class) @@ -336,7 +344,7 @@ class FlavaImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase # Initialize image_processing random.seed(1234) image_processing = self.image_processing_class(**self.image_processor_dict) - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True) # Test not batched input encoded_images = image_processing(image_inputs[0], return_image_mask=True, return_tensors="pt") @@ -346,7 +354,7 @@ class FlavaImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, PIL.Image.Image) diff --git a/tests/models/glpn/test_image_processing_glpn.py b/tests/models/glpn/test_image_processing_glpn.py index dddc2807bc0..e9e6210252c 100644 --- a/tests/models/glpn/test_image_processing_glpn.py +++ b/tests/models/glpn/test_image_processing_glpn.py @@ -21,7 +21,7 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -63,10 +63,33 @@ class GLPNImageProcessingTester(unittest.TestCase): "do_rescale": self.do_rescale, } + def expected_output_image_shape(self, images): + if isinstance(images[0], Image.Image): + width, height = images[0].size + else: + height, width = images[0].shape[1], images[0].shape[2] + + height = height // self.size_divisor * self.size_divisor + width = width // self.size_divisor * self.size_divisor + + return self.num_channels, height, width + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + size_divisor=self.size_divisor, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class GLPNImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class GLPNImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = GLPNImageProcessor if is_vision_available() else None def setUp(self): @@ -83,44 +106,41 @@ class GLPNImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase) self.assertTrue(hasattr(image_processing, "resample")) self.assertTrue(hasattr(image_processing, "do_rescale")) - def test_batch_feature(self): - pass - def test_call_pil(self): # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input (GLPNImageProcessor doesn't support batching) encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertTrue(encoded_images.shape[-1] % self.image_processor_tester.size_divisor == 0) - self.assertTrue(encoded_images.shape[-2] % self.image_processor_tester.size_divisor == 0) + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(image_inputs) + self.assertTrue(tuple(encoded_images.shape) == (1, *expected_output_image_shape)) def test_call_numpy(self): # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input (GLPNImageProcessor doesn't support batching) encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertTrue(encoded_images.shape[-1] % self.image_processor_tester.size_divisor == 0) - self.assertTrue(encoded_images.shape[-2] % self.image_processor_tester.size_divisor == 0) + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(image_inputs) + self.assertTrue(tuple(encoded_images.shape) == (1, *expected_output_image_shape)) def test_call_pytorch(self): # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input (GLPNImageProcessor doesn't support batching) encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertTrue(encoded_images.shape[-1] % self.image_processor_tester.size_divisor == 0) - self.assertTrue(encoded_images.shape[-2] % self.image_processor_tester.size_divisor == 0) + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(image_inputs) + self.assertTrue(tuple(encoded_images.shape) == (1, *expected_output_image_shape)) diff --git a/tests/models/imagegpt/test_image_processing_imagegpt.py b/tests/models/imagegpt/test_image_processing_imagegpt.py index b0a2d5ceb0a..dd141be49bf 100644 --- a/tests/models/imagegpt/test_image_processing_imagegpt.py +++ b/tests/models/imagegpt/test_image_processing_imagegpt.py @@ -25,7 +25,7 @@ from datasets import load_dataset from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -75,10 +75,24 @@ class ImageGPTImageProcessingTester(unittest.TestCase): "do_normalize": self.do_normalize, } + def expected_output_image_shape(self, images): + return (self.size["height"] * self.size["width"],) + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class ImageGPTImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class ImageGPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = ImageGPTImageProcessor if is_vision_available() else None def setUp(self): @@ -144,6 +158,68 @@ class ImageGPTImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestC def test_init_without_params(self): pass + # Override the test from ImageProcessingTestMixin as ImageGPT model takes input_ids as input + def test_call_pil(self): + # Initialize image_processing + image_processing = self.image_processing_class(**self.image_processor_dict) + # create random PIL images + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False) + for image in image_inputs: + self.assertIsInstance(image, Image.Image) + + # Test not batched input + encoded_images = image_processing(image_inputs[0], return_tensors="pt").input_ids + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(encoded_images) + self.assertEqual(tuple(encoded_images.shape), (1, *expected_output_image_shape)) + + # Test batched + encoded_images = image_processing(image_inputs, return_tensors="pt").input_ids + self.assertEqual( + tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape) + ) + + # Override the test from ImageProcessingTestMixin as ImageGPT model takes input_ids as input + def test_call_numpy(self): + # Initialize image_processing + image_processing = self.image_processing_class(**self.image_processor_dict) + # create random numpy tensors + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, numpify=True) + for image in image_inputs: + self.assertIsInstance(image, np.ndarray) + + # Test not batched input + encoded_images = image_processing(image_inputs[0], return_tensors="pt").input_ids + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(encoded_images) + self.assertEqual(tuple(encoded_images.shape), (1, *expected_output_image_shape)) + + # Test batched + encoded_images = image_processing(image_inputs, return_tensors="pt").input_ids + self.assertEqual( + tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape) + ) + + # Override the test from ImageProcessingTestMixin as ImageGPT model takes input_ids as input + def test_call_pytorch(self): + # Initialize image_processing + image_processing = self.image_processing_class(**self.image_processor_dict) + # create random PyTorch tensors + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True) + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(image_inputs) + + for image in image_inputs: + self.assertIsInstance(image, torch.Tensor) + + # Test not batched input + encoded_images = image_processing(image_inputs[0], return_tensors="pt").input_ids + self.assertEqual(tuple(encoded_images.shape), (1, *expected_output_image_shape)) + + # Test batched + encoded_images = image_processing(image_inputs, return_tensors="pt").input_ids + self.assertEqual( + tuple(encoded_images.shape), + (self.image_processor_tester.batch_size, *expected_output_image_shape), + ) + def prepare_images(): dataset = load_dataset("hf-internal-testing/fixtures_image_utils", split="test") diff --git a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py index 52bb80e14c9..b6200c3ee56 100644 --- a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py @@ -16,17 +16,12 @@ import unittest -import numpy as np - from transformers.testing_utils import require_pytesseract, require_torch -from transformers.utils import is_pytesseract_available, is_torch_available +from transformers.utils import is_pytesseract_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_pytesseract_available(): from PIL import Image @@ -60,10 +55,24 @@ class LayoutLMv2ImageProcessingTester(unittest.TestCase): def prepare_image_processor_dict(self): return {"do_resize": self.do_resize, "size": self.size, "apply_ocr": self.apply_ocr} + def expected_output_image_shape(self, images): + return self.num_channels, self.size["height"], self.size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_pytesseract -class LayoutLMv2ImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class LayoutLMv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = LayoutLMv2ImageProcessor if is_pytesseract_available() else None def setUp(self): @@ -86,108 +95,6 @@ class LayoutLMv2ImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Tes image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42) self.assertEqual(image_processor.size, {"height": 42, "width": 42}) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoding = image_processing(image_inputs[0], return_tensors="pt") - self.assertEqual( - encoding.pixel_values.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - self.assertIsInstance(encoding.words, list) - self.assertIsInstance(encoding.boxes, list) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - def test_layoutlmv2_integration_test(self): # with apply_OCR = True image_processing = LayoutLMv2ImageProcessor() diff --git a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py index 8827cdeea23..9b19c376d90 100644 --- a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py @@ -16,17 +16,12 @@ import unittest -import numpy as np - from transformers.testing_utils import require_pytesseract, require_torch -from transformers.utils import is_pytesseract_available, is_torch_available +from transformers.utils import is_pytesseract_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_pytesseract_available(): from PIL import Image @@ -60,10 +55,24 @@ class LayoutLMv3ImageProcessingTester(unittest.TestCase): def prepare_image_processor_dict(self): return {"do_resize": self.do_resize, "size": self.size, "apply_ocr": self.apply_ocr} + def expected_output_image_shape(self, images): + return self.num_channels, self.size["height"], self.size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_pytesseract -class LayoutLMv3ImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class LayoutLMv3ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = LayoutLMv3ImageProcessor if is_pytesseract_available() else None def setUp(self): @@ -86,108 +95,6 @@ class LayoutLMv3ImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Tes image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42) self.assertEqual(image_processor.size, {"height": 42, "width": 42}) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoding = image_processing(image_inputs[0], return_tensors="pt") - self.assertEqual( - encoding.pixel_values.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - self.assertIsInstance(encoding.words, list) - self.assertIsInstance(encoding.boxes, list) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - def test_LayoutLMv3_integration_test(self): # with apply_OCR = True image_processing = LayoutLMv3ImageProcessor() diff --git a/tests/models/levit/test_image_processing_levit.py b/tests/models/levit/test_image_processing_levit.py index 12d64c81771..756993c6b67 100644 --- a/tests/models/levit/test_image_processing_levit.py +++ b/tests/models/levit/test_image_processing_levit.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import LevitImageProcessor @@ -77,10 +70,24 @@ class LevitImageProcessingTester(unittest.TestCase): "crop_size": self.crop_size, } + def expected_output_image_shape(self, images): + return self.num_channels, self.crop_size["height"], self.crop_size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class LevitImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class LevitImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = LevitImageProcessor if is_vision_available() else None def setUp(self): @@ -107,102 +114,3 @@ class LevitImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42, crop_size=84) self.assertEqual(image_processor.size, {"shortest_edge": 42}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) - - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) diff --git a/tests/models/mask2former/test_image_processing_mask2former.py b/tests/models/mask2former/test_image_processing_mask2former.py index 4ba6389b7f5..b3fe50164e5 100644 --- a/tests/models/mask2former/test_image_processing_mask2former.py +++ b/tests/models/mask2former/test_image_processing_mask2former.py @@ -23,7 +23,7 @@ from huggingface_hub import hf_hub_download from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -127,10 +127,25 @@ class Mask2FormerImageProcessingTester(unittest.TestCase): masks_queries_logits=torch.randn((self.batch_size, self.num_queries, self.height, self.width)), ) + def expected_output_image_shape(self, images): + height, width = self.get_expected_values(images, batched=True) + return self.num_channels, height, width + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class Mask2FormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class Mask2FormerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = Mask2FormerImageProcessor if (is_vision_available() and is_torch_available()) else None def setUp(self): @@ -161,107 +176,6 @@ class Mask2FormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te self.assertEqual(image_processor.size, {"shortest_edge": 42, "longest_edge": 84}) self.assertEqual(image_processor.size_divisor, 8) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - def comm_get_image_processing_inputs( self, with_segmentation_maps=False, is_instance_map=False, segmentation_type="np" ): @@ -270,7 +184,7 @@ class Mask2FormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te num_labels = self.image_processor_tester.num_labels annotations = None instance_id_to_semantic_id = None - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False) if with_segmentation_maps: high = num_labels if is_instance_map: @@ -292,9 +206,6 @@ class Mask2FormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te return inputs - def test_init_without_params(self): - pass - def test_with_size_divisor(self): size_divisors = [8, 16, 32] weird_input_sizes = [(407, 802), (582, 1094)] diff --git a/tests/models/maskformer/test_image_processing_maskformer.py b/tests/models/maskformer/test_image_processing_maskformer.py index 535582bc1f8..e7dc0077765 100644 --- a/tests/models/maskformer/test_image_processing_maskformer.py +++ b/tests/models/maskformer/test_image_processing_maskformer.py @@ -23,7 +23,7 @@ from huggingface_hub import hf_hub_download from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -127,10 +127,25 @@ class MaskFormerImageProcessingTester(unittest.TestCase): masks_queries_logits=torch.randn((self.batch_size, self.num_queries, self.height, self.width)), ) + def expected_output_image_shape(self, images): + height, width = self.get_expected_values(images, batched=True) + return self.num_channels, height, width + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class MaskFormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class MaskFormerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = MaskFormerImageProcessor if (is_vision_available() and is_torch_available()) else None def setUp(self): @@ -161,107 +176,6 @@ class MaskFormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Tes self.assertEqual(image_processor.size, {"shortest_edge": 42, "longest_edge": 84}) self.assertEqual(image_processor.size_divisor, 8) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - def comm_get_image_processing_inputs( self, with_segmentation_maps=False, is_instance_map=False, segmentation_type="np" ): @@ -270,7 +184,7 @@ class MaskFormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Tes num_labels = self.image_processor_tester.num_labels annotations = None instance_id_to_semantic_id = None - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False) if with_segmentation_maps: high = num_labels if is_instance_map: @@ -292,9 +206,6 @@ class MaskFormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Tes return inputs - def test_init_without_params(self): - pass - def test_with_size_divisor(self): size_divisors = [8, 16, 32] weird_input_sizes = [(407, 802), (582, 1094)] diff --git a/tests/models/mgp_str/test_processor_mgp_str.py b/tests/models/mgp_str/test_processor_mgp_str.py index 4cffc344a36..7c373b44736 100644 --- a/tests/models/mgp_str/test_processor_mgp_str.py +++ b/tests/models/mgp_str/test_processor_mgp_str.py @@ -46,7 +46,7 @@ class MgpstrProcessorTest(unittest.TestCase): @property def image_processor_dict(self): - return self.image_processor_tester.prepare_image_processor_dict() + return self.prepare_image_processor_dict() def setUp(self): self.image_size = (3, 32, 128) diff --git a/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py b/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py index 51ca6f3b17c..ce0ecba34c0 100644 --- a/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py +++ b/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import MobileNetV1ImageProcessor @@ -68,10 +61,24 @@ class MobileNetV1ImageProcessingTester(unittest.TestCase): "crop_size": self.crop_size, } + def expected_output_image_shape(self, images): + return self.num_channels, self.crop_size["height"], self.crop_size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class MobileNetV1ImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class MobileNetV1ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = MobileNetV1ImageProcessor if is_vision_available() else None def setUp(self): @@ -96,102 +103,3 @@ class MobileNetV1ImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42, crop_size=84) self.assertEqual(image_processor.size, {"shortest_edge": 42}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) - - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) diff --git a/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py b/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py index d5f148b21cd..4c94be47212 100644 --- a/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py +++ b/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import MobileNetV2ImageProcessor @@ -68,10 +61,24 @@ class MobileNetV2ImageProcessingTester(unittest.TestCase): "crop_size": self.crop_size, } + def expected_output_image_shape(self, images): + return self.num_channels, self.crop_size["height"], self.crop_size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class MobileNetV2ImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class MobileNetV2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = MobileNetV2ImageProcessor if is_vision_available() else None def setUp(self): @@ -96,102 +103,3 @@ class MobileNetV2ImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42, crop_size=84) self.assertEqual(image_processor.size, {"shortest_edge": 42}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) - - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) diff --git a/tests/models/mobilevit/test_image_processing_mobilevit.py b/tests/models/mobilevit/test_image_processing_mobilevit.py index fbc72a2d9e0..edfdf0aed55 100644 --- a/tests/models/mobilevit/test_image_processing_mobilevit.py +++ b/tests/models/mobilevit/test_image_processing_mobilevit.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import MobileViTImageProcessor @@ -71,10 +64,24 @@ class MobileViTImageProcessingTester(unittest.TestCase): "do_flip_channel_order": self.do_flip_channel_order, } + def expected_output_image_shape(self, images): + return self.num_channels, self.crop_size["height"], self.crop_size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class MobileViTImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class MobileViTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = MobileViTImageProcessor if is_vision_available() else None def setUp(self): @@ -100,102 +107,3 @@ class MobileViTImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Test image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42, crop_size=84) self.assertEqual(image_processor.size, {"shortest_edge": 42}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) - - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) diff --git a/tests/models/oneformer/test_image_processing_oneformer.py b/tests/models/oneformer/test_image_processing_oneformer.py index 864c803f354..6fa95f23414 100644 --- a/tests/models/oneformer/test_image_processing_oneformer.py +++ b/tests/models/oneformer/test_image_processing_oneformer.py @@ -23,7 +23,7 @@ from huggingface_hub import hf_hub_download from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -152,20 +152,35 @@ class OneFormerImageProcessorTester(unittest.TestCase): masks_queries_logits=torch.randn((self.batch_size, self.num_queries, self.height, self.width)), ) + def expected_output_image_shape(self, images): + height, width = self.get_expected_values(images, batched=True) + return self.num_channels, height, width + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class OneFormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class OneFormerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = OneFormerImageProcessor if (is_vision_available() and is_torch_available()) else None # only for test_image_processing_common.test_image_proc_to_json_string image_processing_class = image_processing_class def setUp(self): - self.image_processing_tester = OneFormerImageProcessorTester(self) + self.image_processor_tester = OneFormerImageProcessorTester(self) @property def image_processor_dict(self): - return self.image_processing_tester.prepare_image_processor_dict() + return self.image_processor_tester.prepare_image_processor_dict() def test_image_proc_properties(self): image_processor = self.image_processing_class(**self.image_processor_dict) @@ -181,120 +196,15 @@ class OneFormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Test self.assertTrue(hasattr(image_processor, "metadata")) self.assertTrue(hasattr(image_processor, "do_reduce_labels")) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processing_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processor(image_inputs[0], ["semantic"], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processing_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processing_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - expected_height, expected_width = self.image_processing_tester.get_expected_values(image_inputs, batched=True) - - encoded_images = image_processor( - image_inputs, ["semantic"] * len(image_inputs), return_tensors="pt" - ).pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processing_tester.batch_size, - self.image_processing_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_numpy(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processing_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processor(image_inputs[0], ["semantic"], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processing_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processing_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - expected_height, expected_width = self.image_processing_tester.get_expected_values(image_inputs, batched=True) - - encoded_images = image_processor( - image_inputs, ["semantic"] * len(image_inputs), return_tensors="pt" - ).pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processing_tester.batch_size, - self.image_processing_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_pytorch(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processing_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processor(image_inputs[0], ["semantic"], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processing_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processing_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - expected_height, expected_width = self.image_processing_tester.get_expected_values(image_inputs, batched=True) - - encoded_images = image_processor( - image_inputs, ["semantic"] * len(image_inputs), return_tensors="pt" - ).pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processing_tester.batch_size, - self.image_processing_tester.num_channels, - expected_height, - expected_width, - ), - ) - def comm_get_image_processor_inputs( self, with_segmentation_maps=False, is_instance_map=False, segmentation_type="np" ): image_processor = self.image_processing_class(**self.image_processor_dict) # prepare image and target - num_labels = self.image_processing_tester.num_labels + num_labels = self.image_processor_tester.num_labels annotations = None instance_id_to_semantic_id = None - image_inputs = prepare_image_inputs(self.image_processing_tester, equal_resolution=False) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False) if with_segmentation_maps: high = num_labels if is_instance_map: @@ -336,7 +246,7 @@ class OneFormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Test self.assertEqual(mask_label.shape[0], class_label.shape[0]) # this ensure padding has happened self.assertEqual(mask_label.shape[1:], pixel_values.shape[2:]) - self.assertEqual(len(text_input), self.image_processing_tester.num_text) + self.assertEqual(len(text_input), self.image_processor_tester.num_text) common() common(is_instance_map=True) @@ -356,69 +266,69 @@ class OneFormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Test def test_post_process_semantic_segmentation(self): fature_extractor = self.image_processing_class( - num_labels=self.image_processing_tester.num_classes, + num_labels=self.image_processor_tester.num_classes, max_seq_length=77, task_seq_length=77, class_info_file="ade20k_panoptic.json", - num_text=self.image_processing_tester.num_text, + num_text=self.image_processor_tester.num_text, repo_path="shi-labs/oneformer_demo", ) - outputs = self.image_processing_tester.get_fake_oneformer_outputs() + outputs = self.image_processor_tester.get_fake_oneformer_outputs() segmentation = fature_extractor.post_process_semantic_segmentation(outputs) - self.assertEqual(len(segmentation), self.image_processing_tester.batch_size) + self.assertEqual(len(segmentation), self.image_processor_tester.batch_size) self.assertEqual( segmentation[0].shape, ( - self.image_processing_tester.height, - self.image_processing_tester.width, + self.image_processor_tester.height, + self.image_processor_tester.width, ), ) - target_sizes = [(1, 4) for i in range(self.image_processing_tester.batch_size)] + target_sizes = [(1, 4) for i in range(self.image_processor_tester.batch_size)] segmentation = fature_extractor.post_process_semantic_segmentation(outputs, target_sizes=target_sizes) self.assertEqual(segmentation[0].shape, target_sizes[0]) def test_post_process_instance_segmentation(self): image_processor = self.image_processing_class( - num_labels=self.image_processing_tester.num_classes, + num_labels=self.image_processor_tester.num_classes, max_seq_length=77, task_seq_length=77, class_info_file="ade20k_panoptic.json", - num_text=self.image_processing_tester.num_text, + num_text=self.image_processor_tester.num_text, repo_path="shi-labs/oneformer_demo", ) - outputs = self.image_processing_tester.get_fake_oneformer_outputs() + outputs = self.image_processor_tester.get_fake_oneformer_outputs() segmentation = image_processor.post_process_instance_segmentation(outputs, threshold=0) - self.assertTrue(len(segmentation) == self.image_processing_tester.batch_size) + self.assertTrue(len(segmentation) == self.image_processor_tester.batch_size) for el in segmentation: self.assertTrue("segmentation" in el) self.assertTrue("segments_info" in el) self.assertEqual(type(el["segments_info"]), list) self.assertEqual( - el["segmentation"].shape, (self.image_processing_tester.height, self.image_processing_tester.width) + el["segmentation"].shape, (self.image_processor_tester.height, self.image_processor_tester.width) ) def test_post_process_panoptic_segmentation(self): image_processor = self.image_processing_class( - num_labels=self.image_processing_tester.num_classes, + num_labels=self.image_processor_tester.num_classes, max_seq_length=77, task_seq_length=77, class_info_file="ade20k_panoptic.json", - num_text=self.image_processing_tester.num_text, + num_text=self.image_processor_tester.num_text, repo_path="shi-labs/oneformer_demo", ) - outputs = self.image_processing_tester.get_fake_oneformer_outputs() + outputs = self.image_processor_tester.get_fake_oneformer_outputs() segmentation = image_processor.post_process_panoptic_segmentation(outputs, threshold=0) - self.assertTrue(len(segmentation) == self.image_processing_tester.batch_size) + self.assertTrue(len(segmentation) == self.image_processor_tester.batch_size) for el in segmentation: self.assertTrue("segmentation" in el) self.assertTrue("segments_info" in el) self.assertEqual(type(el["segments_info"]), list) self.assertEqual( - el["segmentation"].shape, (self.image_processing_tester.height, self.image_processing_tester.width) + el["segmentation"].shape, (self.image_processor_tester.height, self.image_processor_tester.width) ) diff --git a/tests/models/oneformer/test_processor_oneformer.py b/tests/models/oneformer/test_processor_oneformer.py index c65807ca912..f6d97643810 100644 --- a/tests/models/oneformer/test_processor_oneformer.py +++ b/tests/models/oneformer/test_processor_oneformer.py @@ -174,6 +174,17 @@ class OneFormerProcessorTester(unittest.TestCase): masks_queries_logits=torch.randn((self.batch_size, self.num_queries, self.height, self.width)), ) + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision @@ -203,7 +214,7 @@ class OneFormerProcessingTest(unittest.TestCase): # Initialize processor processor = self.processing_class(**self.processor_dict) # create random PIL images - image_inputs = prepare_image_inputs(self.processing_tester, equal_resolution=False) + image_inputs = self.processing_tester.prepare_image_inputs(equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) @@ -255,7 +266,7 @@ class OneFormerProcessingTest(unittest.TestCase): # Initialize processor processor = self.processing_class(**self.processor_dict) # create random numpy tensors - image_inputs = prepare_image_inputs(self.processing_tester, equal_resolution=False, numpify=True) + image_inputs = self.processing_tester.prepare_image_inputs(equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) @@ -307,7 +318,7 @@ class OneFormerProcessingTest(unittest.TestCase): # Initialize processor processor = self.processing_class(**self.processor_dict) # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.processing_tester, equal_resolution=False, torchify=True) + image_inputs = self.processing_tester.prepare_image_inputs(equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) @@ -361,7 +372,7 @@ class OneFormerProcessingTest(unittest.TestCase): num_labels = self.processing_tester.num_labels annotations = None instance_id_to_semantic_id = None - image_inputs = prepare_image_inputs(self.processing_tester, equal_resolution=False) + image_inputs = self.processing_tester.prepare_image_inputs(equal_resolution=False) if with_segmentation_maps: high = num_labels if is_instance_map: diff --git a/tests/models/owlvit/test_image_processing_owlvit.py b/tests/models/owlvit/test_image_processing_owlvit.py index 5a0afa38265..f4897c051ec 100644 --- a/tests/models/owlvit/test_image_processing_owlvit.py +++ b/tests/models/owlvit/test_image_processing_owlvit.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import OwlViTImageProcessor @@ -78,10 +71,24 @@ class OwlViTImageProcessingTester(unittest.TestCase): "do_convert_rgb": self.do_convert_rgb, } + def expected_output_image_shape(self, images): + return self.num_channels, self.crop_size["height"], self.crop_size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class OwlViTImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class OwlViTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = OwlViTImageProcessor if is_vision_available() else None def setUp(self): @@ -110,100 +117,3 @@ class OwlViTImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCas image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42, crop_size=84) self.assertEqual(image_processor.size, {"height": 42, "width": 42}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) diff --git a/tests/models/pix2struct/test_image_processing_pix2struct.py b/tests/models/pix2struct/test_image_processing_pix2struct.py index 51a4708a76b..5bf729a502e 100644 --- a/tests/models/pix2struct/test_image_processing_pix2struct.py +++ b/tests/models/pix2struct/test_image_processing_pix2struct.py @@ -22,7 +22,7 @@ import requests from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -73,6 +73,17 @@ class Pix2StructImageProcessingTester(unittest.TestCase): raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB") return raw_image + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @unittest.skipIf( not is_torch_greater_or_equal_than_1_11, @@ -80,7 +91,7 @@ class Pix2StructImageProcessingTester(unittest.TestCase): ) @require_torch @require_vision -class Pix2StructImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class Pix2StructImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = Pix2StructImageProcessor if is_vision_available() else None def setUp(self): @@ -108,7 +119,7 @@ class Pix2StructImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Tes # Initialize image_processor image_processor = self.image_processing_class(**self.image_processor_dict) # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) @@ -141,7 +152,7 @@ class Pix2StructImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Tes # Initialize image_processor image_processor = self.image_processing_class(**self.image_processor_dict) # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) @@ -183,7 +194,7 @@ class Pix2StructImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Tes # Initialize image_processor image_processor = self.image_processing_class(**self.image_processor_dict) # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) @@ -215,7 +226,7 @@ class Pix2StructImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Tes # Initialize image_processor image_processor = self.image_processing_class(**self.image_processor_dict) # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) @@ -251,7 +262,7 @@ class Pix2StructImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Tes ) @require_torch @require_vision -class Pix2StructImageProcessingTestFourChannels(ImageProcessingSavingTestMixin, unittest.TestCase): +class Pix2StructImageProcessingTestFourChannels(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = Pix2StructImageProcessor if is_vision_available() else None def setUp(self): @@ -267,11 +278,11 @@ class Pix2StructImageProcessingTestFourChannels(ImageProcessingSavingTestMixin, self.assertTrue(hasattr(image_processor, "do_normalize")) self.assertTrue(hasattr(image_processor, "do_convert_rgb")) - def test_call_pil_four_channels(self): + def test_call_pil(self): # Initialize image_processor image_processor = self.image_processing_class(**self.image_processor_dict) # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) @@ -299,3 +310,11 @@ class Pix2StructImageProcessingTestFourChannels(ImageProcessingSavingTestMixin, encoded_images.shape, (self.image_processor_tester.batch_size, max_patch, expected_hidden_dim), ) + + @unittest.skip("Pix2StructImageProcessor does not support 4 channels yet") # FIXME Amy + def test_call_numpy(self): + return super().test_call_numpy() + + @unittest.skip("Pix2StructImageProcessor does not support 4 channels yet") # FIXME Amy + def test_call_pytorch(self): + return super().test_call_torch() diff --git a/tests/models/poolformer/test_image_processing_poolformer.py b/tests/models/poolformer/test_image_processing_poolformer.py index b6078fb5c5a..017a511c408 100644 --- a/tests/models/poolformer/test_image_processing_poolformer.py +++ b/tests/models/poolformer/test_image_processing_poolformer.py @@ -15,20 +15,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import PoolFormerImageProcessor @@ -74,10 +67,24 @@ class PoolFormerImageProcessingTester(unittest.TestCase): "image_std": self.image_std, } + def expected_output_image_shape(self, images): + return self.num_channels, self.crop_size["height"], self.crop_size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class PoolFormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class PoolFormerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = PoolFormerImageProcessor if is_vision_available() else None def setUp(self): @@ -104,103 +111,3 @@ class PoolFormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Tes image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42, crop_size=84) self.assertEqual(image_processor.size, {"shortest_edge": 42}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) - - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) diff --git a/tests/models/pvt/test_image_processing_pvt.py b/tests/models/pvt/test_image_processing_pvt.py index 726b0f9122f..d6b11313d81 100644 --- a/tests/models/pvt/test_image_processing_pvt.py +++ b/tests/models/pvt/test_image_processing_pvt.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import PvtImageProcessor @@ -70,10 +63,24 @@ class PvtImageProcessingTester(unittest.TestCase): "size": self.size, } + def expected_output_image_shape(self, images): + return self.num_channels, self.size["height"], self.size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class PvtImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class PvtImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = PvtImageProcessor if is_vision_available() else None def setUp(self): @@ -97,102 +104,3 @@ class PvtImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42) self.assertEqual(image_processor.size, {"height": 42, "width": 42}) - - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) diff --git a/tests/models/segformer/test_image_processing_segformer.py b/tests/models/segformer/test_image_processing_segformer.py index 5559fae83b0..d84afdaa574 100644 --- a/tests/models/segformer/test_image_processing_segformer.py +++ b/tests/models/segformer/test_image_processing_segformer.py @@ -16,13 +16,12 @@ import unittest -import numpy as np from datasets import load_dataset from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -72,6 +71,20 @@ class SegformerImageProcessingTester(unittest.TestCase): "do_reduce_labels": self.do_reduce_labels, } + def expected_output_image_shape(self, images): + return self.num_channels, self.size["height"], self.size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + def prepare_semantic_single_inputs(): dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") @@ -95,7 +108,7 @@ def prepare_semantic_batch_inputs(): @require_torch @require_vision -class SegformerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class SegformerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = SegformerImageProcessor if is_vision_available() else None def setUp(self): @@ -123,110 +136,11 @@ class SegformerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Test self.assertEqual(image_processor.size, {"height": 42, "width": 42}) self.assertEqual(image_processor.do_reduce_labels, True) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - def test_call_segmentation_maps(self): # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True) maps = [] for image in image_inputs: self.assertIsInstance(image, torch.Tensor) diff --git a/tests/models/swin2sr/test_image_processing_swin2sr.py b/tests/models/swin2sr/test_image_processing_swin2sr.py index 1cb19387ffb..8448062132d 100644 --- a/tests/models/swin2sr/test_image_processing_swin2sr.py +++ b/tests/models/swin2sr/test_image_processing_swin2sr.py @@ -21,7 +21,7 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -67,40 +67,34 @@ class Swin2SRImageProcessingTester(unittest.TestCase): "pad_size": self.pad_size, } - def prepare_inputs(self, equal_resolution=False, numpify=False, torchify=False): - """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True, - or a list of PyTorch tensors if one specifies torchify=True. - """ + def expected_output_image_shape(self, images): + img = images[0] - assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time" - - if equal_resolution: - image_inputs = [] - for i in range(self.batch_size): - image_inputs.append( - np.random.randint( - 255, size=(self.num_channels, self.max_resolution, self.max_resolution), dtype=np.uint8 - ) - ) + if isinstance(img, Image.Image): + input_width, input_height = img.size else: - image_inputs = [] - for i in range(self.batch_size): - width, height = np.random.choice(np.arange(self.min_resolution, self.max_resolution), 2) - image_inputs.append(np.random.randint(255, size=(self.num_channels, width, height), dtype=np.uint8)) + input_height, input_width = img.shape[-2:] - if not numpify and not torchify: - # PIL expects the channel dimension as last dimension - image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs] + pad_height = (input_height // self.pad_size + 1) * self.pad_size - input_height + pad_width = (input_width // self.pad_size + 1) * self.pad_size - input_width - if torchify: - image_inputs = [torch.from_numpy(x) for x in image_inputs] + return self.num_channels, input_height + pad_height, input_width + pad_width - return image_inputs + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) @require_torch @require_vision -class Swin2SRImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class Swin2SRImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = Swin2SRImageProcessor if is_vision_available() else None def setUp(self): @@ -117,9 +111,6 @@ class Swin2SRImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCa self.assertTrue(hasattr(image_processor, "do_pad")) self.assertTrue(hasattr(image_processor, "pad_size")) - def test_batch_feature(self): - pass - def calculate_expected_size(self, image): old_height, old_width = get_image_size(image) size = self.image_processor_tester.pad_size @@ -128,65 +119,45 @@ class Swin2SRImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCa pad_width = (old_width // size + 1) * size - old_width return old_height + pad_height, old_width + pad_width + # Swin2SRImageProcessor does not support batched input def test_call_pil(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) + # Initialize image_processing + image_processing = self.image_processing_class(**self.image_processor_dict) # create random PIL images - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input - encoded_images = image_processor(image_inputs[0], return_tensors="pt").pixel_values - expected_height, expected_width = self.calculate_expected_size(np.array(image_inputs[0])) - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) + encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape([image_inputs[0]]) + self.assertEqual(tuple(encoded_images.shape), (1, *expected_output_image_shape)) + # Swin2SRImageProcessor does not support batched input def test_call_numpy(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) + # Initialize image_processing + image_processing = self.image_processing_class(**self.image_processor_dict) # create random numpy tensors - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False, numpify=True) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input - encoded_images = image_processor(image_inputs[0], return_tensors="pt").pixel_values - expected_height, expected_width = self.calculate_expected_size(image_inputs[0]) - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) + encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape([image_inputs[0]]) + self.assertEqual(tuple(encoded_images.shape), (1, *expected_output_image_shape)) + # Swin2SRImageProcessor does not support batched input def test_call_pytorch(self): - # Initialize image_processor - image_processor = self.image_processing_class(**self.image_processor_dict) + # Initialize image_processing + image_processing = self.image_processing_class(**self.image_processor_dict) # create random PyTorch tensors - image_inputs = self.image_processor_tester.prepare_inputs(equal_resolution=False, torchify=True) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True) + for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input - encoded_images = image_processor(image_inputs[0], return_tensors="pt").pixel_values - expected_height, expected_width = self.calculate_expected_size(image_inputs[0]) - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) + encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape([image_inputs[0]]) + self.assertEqual(tuple(encoded_images.shape), (1, *expected_output_image_shape)) diff --git a/tests/models/tvlt/test_image_processor_tvlt.py b/tests/models/tvlt/test_image_processor_tvlt.py index e31d7a2a17e..6e5c1c4c868 100644 --- a/tests/models/tvlt/test_image_processor_tvlt.py +++ b/tests/models/tvlt/test_image_processor_tvlt.py @@ -21,7 +21,7 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin +from ...test_image_processing_common import ImageProcessingTestMixin if is_torch_available(): @@ -128,7 +128,7 @@ class TvltImageProcessorTester(unittest.TestCase): @require_torch @require_vision -class TvltImageProcessorTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class TvltImageProcessorTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = TvltImageProcessor if is_vision_available() else None def setUp(self): diff --git a/tests/models/videomae/test_image_processing_videomae.py b/tests/models/videomae/test_image_processing_videomae.py index 70980c195cf..140942fd135 100644 --- a/tests/models/videomae/test_image_processing_videomae.py +++ b/tests/models/videomae/test_image_processing_videomae.py @@ -21,7 +21,7 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_video_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_video_inputs if is_torch_available(): @@ -77,10 +77,25 @@ class VideoMAEImageProcessingTester(unittest.TestCase): "crop_size": self.crop_size, } + def expected_output_image_shape(self, images): + return self.num_frames, self.num_channels, self.crop_size["height"], self.crop_size["width"] + + def prepare_video_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_video_inputs( + batch_size=self.batch_size, + num_frames=self.num_frames, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class VideoMAEImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class VideoMAEImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = VideoMAEImageProcessor if is_vision_available() else None def setUp(self): @@ -108,110 +123,65 @@ class VideoMAEImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestC self.assertEqual(image_processor.size, {"shortest_edge": 42}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) - def test_batch_feature(self): - pass - def test_call_pil(self): # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random PIL videos - video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False) + video_inputs = self.image_processor_tester.prepare_video_inputs(equal_resolution=False) for video in video_inputs: self.assertIsInstance(video, list) self.assertIsInstance(video[0], Image.Image) # Test not batched input encoded_videos = image_processing(video_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - 1, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) + expected_output_video_shape = self.image_processor_tester.expected_output_image_shape([encoded_videos[0]]) + self.assertEqual(tuple(encoded_videos.shape), (1, *expected_output_video_shape)) # Test batched encoded_videos = image_processing(video_inputs, return_tensors="pt").pixel_values + expected_output_video_shape = self.image_processor_tester.expected_output_image_shape(encoded_videos) self.assertEqual( - encoded_videos.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), + tuple(encoded_videos.shape), (self.image_processor_tester.batch_size, *expected_output_video_shape) ) def test_call_numpy(self): # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random numpy tensors - video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) + video_inputs = self.image_processor_tester.prepare_video_inputs(equal_resolution=False, numpify=True) for video in video_inputs: self.assertIsInstance(video, list) self.assertIsInstance(video[0], np.ndarray) # Test not batched input encoded_videos = image_processing(video_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - 1, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) + expected_output_video_shape = self.image_processor_tester.expected_output_image_shape([encoded_videos[0]]) + self.assertEqual(tuple(encoded_videos.shape), (1, *expected_output_video_shape)) # Test batched encoded_videos = image_processing(video_inputs, return_tensors="pt").pixel_values + expected_output_video_shape = self.image_processor_tester.expected_output_image_shape(encoded_videos) self.assertEqual( - encoded_videos.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), + tuple(encoded_videos.shape), (self.image_processor_tester.batch_size, *expected_output_video_shape) ) def test_call_pytorch(self): # Initialize image_processing image_processing = self.image_processing_class(**self.image_processor_dict) # create random PyTorch tensors - video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) + video_inputs = self.image_processor_tester.prepare_video_inputs(equal_resolution=False, torchify=True) for video in video_inputs: self.assertIsInstance(video, list) self.assertIsInstance(video[0], torch.Tensor) # Test not batched input encoded_videos = image_processing(video_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - 1, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) + expected_output_video_shape = self.image_processor_tester.expected_output_image_shape([encoded_videos[0]]) + self.assertEqual(tuple(encoded_videos.shape), (1, *expected_output_video_shape)) # Test batched encoded_videos = image_processing(video_inputs, return_tensors="pt").pixel_values + expected_output_video_shape = self.image_processor_tester.expected_output_image_shape(encoded_videos) self.assertEqual( - encoded_videos.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), + tuple(encoded_videos.shape), (self.image_processor_tester.batch_size, *expected_output_video_shape) ) diff --git a/tests/models/vilt/test_image_processing_vilt.py b/tests/models/vilt/test_image_processing_vilt.py index 28cf9f2fe6b..607a8b929d1 100644 --- a/tests/models/vilt/test_image_processing_vilt.py +++ b/tests/models/vilt/test_image_processing_vilt.py @@ -16,17 +16,12 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): from PIL import Image @@ -113,10 +108,25 @@ class ViltImageProcessingTester(unittest.TestCase): return expected_height, expected_width + def expected_output_image_shape(self, images): + height, width = self.get_expected_values(images, batched=True) + return (self.num_channels, height, width) + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class ViltImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class ViltImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = ViltImageProcessor if is_vision_available() else None def setUp(self): @@ -141,99 +151,3 @@ class ViltImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase) image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42) self.assertEqual(image_processor.size, {"shortest_edge": 42}) - - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) diff --git a/tests/models/vit/test_image_processing_vit.py b/tests/models/vit/test_image_processing_vit.py index 171ce65e74f..c1c22c0a800 100644 --- a/tests/models/vit/test_image_processing_vit.py +++ b/tests/models/vit/test_image_processing_vit.py @@ -16,20 +16,13 @@ import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_torch_available, is_vision_available +from transformers.utils import is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs -if is_torch_available(): - import torch - if is_vision_available(): - from PIL import Image - from transformers import ViTImageProcessor @@ -70,10 +63,24 @@ class ViTImageProcessingTester(unittest.TestCase): "size": self.size, } + def expected_output_image_shape(self, images): + return self.num_channels, self.size["height"], self.size["width"] + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class ViTImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class ViTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = ViTImageProcessor if is_vision_available() else None def setUp(self): @@ -97,102 +104,3 @@ class ViTImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42) self.assertEqual(image_processor.size, {"height": 42, "width": 42}) - - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - 1, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - self.image_processor_tester.size["height"], - self.image_processor_tester.size["width"], - ), - ) diff --git a/tests/models/vivit/test_image_processing_vivit.py b/tests/models/vivit/test_image_processing_vivit.py index 69547347487..d901a86198a 100644 --- a/tests/models/vivit/test_image_processing_vivit.py +++ b/tests/models/vivit/test_image_processing_vivit.py @@ -21,7 +21,7 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_video_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_video_inputs if is_torch_available(): @@ -77,10 +77,25 @@ class VivitImageProcessingTester(unittest.TestCase): "crop_size": self.crop_size, } + def expected_output_image_shape(self, images): + return self.num_frames, self.num_channels, self.crop_size["height"], self.crop_size["width"] + + def prepare_video_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_video_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + num_frames=self.num_frames, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class VivitImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class VivitImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = VivitImageProcessor if is_vision_available() else None def setUp(self): @@ -108,111 +123,6 @@ class VivitImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase self.assertEqual(image_processor.size, {"shortest_edge": 42}) self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84}) - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL videos - video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False) - for video in video_inputs: - self.assertIsInstance(video, list) - self.assertIsInstance(video[0], Image.Image) - - # Test not batched input - encoded_videos = image_processing(video_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - 1, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_videos = image_processing(video_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for video in video_inputs: - self.assertIsInstance(video, list) - self.assertIsInstance(video[0], np.ndarray) - - # Test not batched input - encoded_videos = image_processing(video_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - 1, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_videos = image_processing(video_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for video in video_inputs: - self.assertIsInstance(video, list) - self.assertIsInstance(video[0], torch.Tensor) - - # Test not batched input - encoded_videos = image_processing(video_inputs[0], return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - 1, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - - # Test batched - encoded_videos = image_processing(video_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_videos.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_frames, - self.image_processor_tester.num_channels, - self.image_processor_tester.crop_size["height"], - self.image_processor_tester.crop_size["width"], - ), - ) - def test_rescale(self): # ViVit optionally rescales between -1 and 1 instead of the usual 0 and 1 image = np.arange(0, 256, 1, dtype=np.uint8).reshape(1, 8, 32) @@ -226,3 +136,66 @@ class VivitImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False) expected_image = (image / 255.0).astype(np.float32) self.assertTrue(np.allclose(rescaled_image, expected_image)) + + def test_call_pil(self): + # Initialize image_processing + image_processing = self.image_processing_class(**self.image_processor_dict) + # create random PIL videos + video_inputs = self.image_processor_tester.prepare_video_inputs(equal_resolution=False) + for video in video_inputs: + self.assertIsInstance(video, list) + self.assertIsInstance(video[0], Image.Image) + + # Test not batched input + encoded_videos = image_processing(video_inputs[0], return_tensors="pt").pixel_values + expected_output_video_shape = self.image_processor_tester.expected_output_image_shape([encoded_videos[0]]) + self.assertEqual(tuple(encoded_videos.shape), (1, *expected_output_video_shape)) + + # Test batched + encoded_videos = image_processing(video_inputs, return_tensors="pt").pixel_values + expected_output_video_shape = self.image_processor_tester.expected_output_image_shape(encoded_videos) + self.assertEqual( + tuple(encoded_videos.shape), (self.image_processor_tester.batch_size, *expected_output_video_shape) + ) + + def test_call_numpy(self): + # Initialize image_processing + image_processing = self.image_processing_class(**self.image_processor_dict) + # create random numpy tensors + video_inputs = self.image_processor_tester.prepare_video_inputs(equal_resolution=False, numpify=True) + for video in video_inputs: + self.assertIsInstance(video, list) + self.assertIsInstance(video[0], np.ndarray) + + # Test not batched input + encoded_videos = image_processing(video_inputs[0], return_tensors="pt").pixel_values + expected_output_video_shape = self.image_processor_tester.expected_output_image_shape([encoded_videos[0]]) + self.assertEqual(tuple(encoded_videos.shape), (1, *expected_output_video_shape)) + + # Test batched + encoded_videos = image_processing(video_inputs, return_tensors="pt").pixel_values + expected_output_video_shape = self.image_processor_tester.expected_output_image_shape(encoded_videos) + self.assertEqual( + tuple(encoded_videos.shape), (self.image_processor_tester.batch_size, *expected_output_video_shape) + ) + + def test_call_pytorch(self): + # Initialize image_processing + image_processing = self.image_processing_class(**self.image_processor_dict) + # create random PyTorch tensors + video_inputs = self.image_processor_tester.prepare_video_inputs(equal_resolution=False, torchify=True) + for video in video_inputs: + self.assertIsInstance(video, list) + self.assertIsInstance(video[0], torch.Tensor) + + # Test not batched input + encoded_videos = image_processing(video_inputs[0], return_tensors="pt").pixel_values + expected_output_video_shape = self.image_processor_tester.expected_output_image_shape([encoded_videos[0]]) + self.assertEqual(tuple(encoded_videos.shape), (1, *expected_output_video_shape)) + + # Test batched + encoded_videos = image_processing(video_inputs, return_tensors="pt").pixel_values + expected_output_video_shape = self.image_processor_tester.expected_output_image_shape(encoded_videos) + self.assertEqual( + tuple(encoded_videos.shape), (self.image_processor_tester.batch_size, *expected_output_video_shape) + ) diff --git a/tests/models/yolos/test_image_processing_yolos.py b/tests/models/yolos/test_image_processing_yolos.py index 937cb6fac6e..003a0061105 100644 --- a/tests/models/yolos/test_image_processing_yolos.py +++ b/tests/models/yolos/test_image_processing_yolos.py @@ -18,12 +18,10 @@ import json import pathlib import unittest -import numpy as np - from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs +from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -111,10 +109,25 @@ class YolosImageProcessingTester(unittest.TestCase): return expected_height, expected_width + def expected_output_image_shape(self, images): + height, width = self.get_expected_values(images, batched=True) + return self.num_channels, height, width + + def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False): + return prepare_image_inputs( + batch_size=self.batch_size, + num_channels=self.num_channels, + min_resolution=self.min_resolution, + max_resolution=self.max_resolution, + equal_resolution=equal_resolution, + numpify=numpify, + torchify=torchify, + ) + @require_torch @require_vision -class YolosImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase): +class YolosImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = YolosImageProcessor if is_vision_available() else None def setUp(self): @@ -143,113 +156,12 @@ class YolosImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase self.assertEqual(image_processor.size, {"shortest_edge": 42, "longest_edge": 84}) self.assertEqual(image_processor.do_pad, False) - def test_batch_feature(self): - pass - - def test_call_pil(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PIL images - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False) - for image in image_inputs: - self.assertIsInstance(image, Image.Image) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_numpy(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random numpy tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True) - for image in image_inputs: - self.assertIsInstance(image, np.ndarray) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - - def test_call_pytorch(self): - # Initialize image_processing - image_processing = self.image_processing_class(**self.image_processor_dict) - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) - for image in image_inputs: - self.assertIsInstance(image, torch.Tensor) - - # Test not batched input - encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs) - - self.assertEqual( - encoded_images.shape, - (1, self.image_processor_tester.num_channels, expected_height, expected_width), - ) - - # Test batched - encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values - - expected_height, expected_width = self.image_processor_tester.get_expected_values(image_inputs, batched=True) - - self.assertEqual( - encoded_images.shape, - ( - self.image_processor_tester.batch_size, - self.image_processor_tester.num_channels, - expected_height, - expected_width, - ), - ) - def test_equivalence_padding(self): # Initialize image_processings image_processing_1 = self.image_processing_class(**self.image_processor_dict) image_processing_2 = self.image_processing_class(do_resize=False, do_normalize=False, do_rescale=False) # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py index 166440c4d52..2eb360c4a14 100644 --- a/tests/test_image_processing_common.py +++ b/tests/test_image_processing_common.py @@ -29,7 +29,16 @@ if is_vision_available(): from PIL import Image -def prepare_image_inputs(image_processor_tester, equal_resolution=False, numpify=False, torchify=False): +def prepare_image_inputs( + batch_size, + min_resolution, + max_resolution, + num_channels, + size_divisor=None, + equal_resolution=False, + numpify=False, + torchify=False, +): """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True, or a list of PyTorch tensors if one specifies torchify=True. @@ -39,19 +48,16 @@ def prepare_image_inputs(image_processor_tester, equal_resolution=False, numpify assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time" image_inputs = [] - for i in range(image_processor_tester.batch_size): + for i in range(batch_size): if equal_resolution: - width = height = image_processor_tester.max_resolution + width = height = max_resolution else: # To avoid getting image width/height 0 - min_resolution = image_processor_tester.min_resolution - if getattr(image_processor_tester, "size_divisor", None): + if size_divisor is not None: # If `size_divisor` is defined, the image needs to have width/size >= `size_divisor` - min_resolution = max(image_processor_tester.size_divisor, min_resolution) - width, height = np.random.choice(np.arange(min_resolution, image_processor_tester.max_resolution), 2) - image_inputs.append( - np.random.randint(255, size=(image_processor_tester.num_channels, width, height), dtype=np.uint8) - ) + min_resolution = max(size_divisor, min_resolution) + width, height = np.random.choice(np.arange(min_resolution, max_resolution), 2) + image_inputs.append(np.random.randint(255, size=(num_channels, width, height), dtype=np.uint8)) if not numpify and not torchify: # PIL expects the channel dimension as last dimension @@ -63,12 +69,12 @@ def prepare_image_inputs(image_processor_tester, equal_resolution=False, numpify return image_inputs -def prepare_video(image_processor_tester, width=10, height=10, numpify=False, torchify=False): +def prepare_video(num_frames, num_channels, width=10, height=10, numpify=False, torchify=False): """This function prepares a video as a list of PIL images/NumPy arrays/PyTorch tensors.""" video = [] - for i in range(image_processor_tester.num_frames): - video.append(np.random.randint(255, size=(image_processor_tester.num_channels, width, height), dtype=np.uint8)) + for i in range(num_frames): + video.append(np.random.randint(255, size=(num_channels, width, height), dtype=np.uint8)) if not numpify and not torchify: # PIL expects the channel dimension as last dimension @@ -80,7 +86,16 @@ def prepare_video(image_processor_tester, width=10, height=10, numpify=False, to return video -def prepare_video_inputs(image_processor_tester, equal_resolution=False, numpify=False, torchify=False): +def prepare_video_inputs( + batch_size, + num_frames, + num_channels, + min_resolution, + max_resolution, + equal_resolution=False, + numpify=False, + torchify=False, +): """This function prepares a batch of videos: a list of list of PIL images, or a list of list of numpy arrays if one specifies numpify=True, or a list of list of PyTorch tensors if one specifies torchify=True. @@ -90,15 +105,14 @@ def prepare_video_inputs(image_processor_tester, equal_resolution=False, numpify assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time" video_inputs = [] - for i in range(image_processor_tester.batch_size): + for i in range(batch_size): if equal_resolution: - width = height = image_processor_tester.max_resolution + width = height = max_resolution else: - width, height = np.random.choice( - np.arange(image_processor_tester.min_resolution, image_processor_tester.max_resolution), 2 - ) + width, height = np.random.choice(np.arange(min_resolution, max_resolution), 2) video = prepare_video( - image_processor_tester=image_processor_tester, + num_frames=num_frames, + num_channels=num_channels, width=width, height=height, numpify=numpify, @@ -109,7 +123,7 @@ def prepare_video_inputs(image_processor_tester, equal_resolution=False, numpify return video_inputs -class ImageProcessingSavingTestMixin: +class ImageProcessingTestMixin: test_cast_dtype = None def test_image_processor_to_json_string(self): @@ -150,7 +164,7 @@ class ImageProcessingSavingTestMixin: image_processor = self.image_processing_class(**self.image_processor_dict) # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True) encoding = image_processor(image_inputs, return_tensors="pt") # for layoutLM compatiblity @@ -176,3 +190,65 @@ class ImageProcessingSavingTestMixin: self.assertEqual(encoding.pixel_values.device, torch.device("cpu")) self.assertEqual(encoding.pixel_values.dtype, torch.float16) self.assertEqual(encoding.input_ids.dtype, torch.long) + + def test_call_pil(self): + # Initialize image_processing + image_processing = self.image_processing_class(**self.image_processor_dict) + # create random PIL images + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False) + for image in image_inputs: + self.assertIsInstance(image, Image.Image) + + # Test not batched input + encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape([image_inputs[0]]) + self.assertEqual(tuple(encoded_images.shape), (1, *expected_output_image_shape)) + + # Test batched + encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(image_inputs) + self.assertEqual( + tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape) + ) + + def test_call_numpy(self): + # Initialize image_processing + image_processing = self.image_processing_class(**self.image_processor_dict) + # create random numpy tensors + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, numpify=True) + for image in image_inputs: + self.assertIsInstance(image, np.ndarray) + + # Test not batched input + encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape([image_inputs[0]]) + self.assertEqual(tuple(encoded_images.shape), (1, *expected_output_image_shape)) + + # Test batched + encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(image_inputs) + self.assertEqual( + tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape) + ) + + def test_call_pytorch(self): + # Initialize image_processing + image_processing = self.image_processing_class(**self.image_processor_dict) + # create random PyTorch tensors + image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True) + + for image in image_inputs: + self.assertIsInstance(image, torch.Tensor) + + # Test not batched input + encoded_images = image_processing(image_inputs[0], return_tensors="pt").pixel_values + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape([image_inputs[0]]) + self.assertEqual(tuple(encoded_images.shape), (1, *expected_output_image_shape)) + + # Test batched + expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(image_inputs) + encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values + self.assertEqual( + tuple(encoded_images.shape), + (self.image_processor_tester.batch_size, *expected_output_image_shape), + )