diff --git a/tests/models/beit/test_image_processing_beit.py b/tests/models/beit/test_image_processing_beit.py index 545b4d79a9e..b499f008457 100644 --- a/tests/models/beit/test_image_processing_beit.py +++ b/tests/models/beit/test_image_processing_beit.py @@ -22,7 +22,8 @@ from datasets import load_dataset from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/conditional_detr/test_image_processing_conditional_detr.py b/tests/models/conditional_detr/test_image_processing_conditional_detr.py index 4f3a6e21e0c..b4e6f46d3e9 100644 --- a/tests/models/conditional_detr/test_image_processing_conditional_detr.py +++ b/tests/models/conditional_detr/test_image_processing_conditional_detr.py @@ -23,7 +23,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/convnext/test_image_processing_convnext.py b/tests/models/convnext/test_image_processing_convnext.py index 9777c3df6d0..4fd62fc51d1 100644 --- a/tests/models/convnext/test_image_processing_convnext.py +++ b/tests/models/convnext/test_image_processing_convnext.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/deformable_detr/test_image_processing_deformable_detr.py b/tests/models/deformable_detr/test_image_processing_deformable_detr.py index aaafb7ff2f2..bc636895394 100644 --- a/tests/models/deformable_detr/test_image_processing_deformable_detr.py +++ b/tests/models/deformable_detr/test_image_processing_deformable_detr.py @@ -23,7 +23,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/deit/test_image_processing_deit.py b/tests/models/deit/test_image_processing_deit.py index f684008ccc3..db1e42f7710 100644 --- a/tests/models/deit/test_image_processing_deit.py +++ b/tests/models/deit/test_image_processing_deit.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/detr/test_image_processing_detr.py b/tests/models/detr/test_image_processing_detr.py index 6aafd62da4b..253ffb7c297 100644 --- a/tests/models/detr/test_image_processing_detr.py +++ b/tests/models/detr/test_image_processing_detr.py @@ -23,7 +23,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/donut/test_image_processing_donut.py b/tests/models/donut/test_image_processing_donut.py index 4d0f88ac988..550d166e460 100644 --- a/tests/models/donut/test_image_processing_donut.py +++ b/tests/models/donut/test_image_processing_donut.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import is_flaky, require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/dpt/test_image_processing_dpt.py b/tests/models/dpt/test_image_processing_dpt.py index 594b1451a74..0bbeb173e59 100644 --- a/tests/models/dpt/test_image_processing_dpt.py +++ b/tests/models/dpt/test_image_processing_dpt.py @@ -21,7 +21,8 @@ import numpy as np from transformers.file_utils import is_torch_available, is_vision_available from transformers.testing_utils import require_torch, require_vision -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/efficientformer/test_image_processing_efficientformer.py b/tests/models/efficientformer/test_image_processing_efficientformer.py index c672b15be18..0a525505646 100644 --- a/tests/models/efficientformer/test_image_processing_efficientformer.py +++ b/tests/models/efficientformer/test_image_processing_efficientformer.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/flava/test_image_processing_flava.py b/tests/models/flava/test_image_processing_flava.py index ba6379e6b34..28718748200 100644 --- a/tests/models/flava/test_image_processing_flava.py +++ b/tests/models/flava/test_image_processing_flava.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/glpn/test_image_processing_glpn.py b/tests/models/glpn/test_image_processing_glpn.py index 4e7f2bdf5c7..31e52776177 100644 --- a/tests/models/glpn/test_image_processing_glpn.py +++ b/tests/models/glpn/test_image_processing_glpn.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py index c26eaac16eb..4423d33376e 100644 --- a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_pytesseract, require_torch from transformers.utils import is_pytesseract_available, is_torch_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py index c8eb976bf58..829fc8d79dd 100644 --- a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_pytesseract, require_torch from transformers.utils import is_pytesseract_available, is_torch_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/levit/test_image_processing_levit.py b/tests/models/levit/test_image_processing_levit.py index 2b1472d9b62..76f3c66e1ad 100644 --- a/tests/models/levit/test_image_processing_levit.py +++ b/tests/models/levit/test_image_processing_levit.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/maskformer/test_image_processing_maskformer.py b/tests/models/maskformer/test_image_processing_maskformer.py index 2036d9f7d28..f8ddf8c9dc0 100644 --- a/tests/models/maskformer/test_image_processing_maskformer.py +++ b/tests/models/maskformer/test_image_processing_maskformer.py @@ -23,7 +23,8 @@ from huggingface_hub import hf_hub_download from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py b/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py index 270d38d5b81..383f91c554f 100644 --- a/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py +++ b/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py b/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py index 3cb4eea2184..e207932e38e 100644 --- a/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py +++ b/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/mobilevit/test_image_processing_mobilevit.py b/tests/models/mobilevit/test_image_processing_mobilevit.py index 468c4689e4d..a22fc2c1d54 100644 --- a/tests/models/mobilevit/test_image_processing_mobilevit.py +++ b/tests/models/mobilevit/test_image_processing_mobilevit.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/oneformer/test_image_processing_oneformer.py b/tests/models/oneformer/test_image_processing_oneformer.py index f34ae080cf9..79c6d82c3f4 100644 --- a/tests/models/oneformer/test_image_processing_oneformer.py +++ b/tests/models/oneformer/test_image_processing_oneformer.py @@ -23,7 +23,8 @@ from huggingface_hub import hf_hub_download from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/oneformer/test_processor_oneformer.py b/tests/models/oneformer/test_processor_oneformer.py index 72d940df8b1..5056d682832 100644 --- a/tests/models/oneformer/test_processor_oneformer.py +++ b/tests/models/oneformer/test_processor_oneformer.py @@ -26,7 +26,7 @@ from huggingface_hub import hf_hub_download from transformers.testing_utils import check_json_file_has_correct_format, require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import prepare_image_inputs +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/owlvit/test_image_processing_owlvit.py b/tests/models/owlvit/test_image_processing_owlvit.py index fe259b11696..bf2cd8d666d 100644 --- a/tests/models/owlvit/test_image_processing_owlvit.py +++ b/tests/models/owlvit/test_image_processing_owlvit.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/poolformer/test_image_processing_poolformer.py b/tests/models/poolformer/test_image_processing_poolformer.py index b1fffe8a5a7..47e583a3211 100644 --- a/tests/models/poolformer/test_image_processing_poolformer.py +++ b/tests/models/poolformer/test_image_processing_poolformer.py @@ -20,7 +20,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/segformer/test_image_processing_segformer.py b/tests/models/segformer/test_image_processing_segformer.py index 4257b27b814..a104fc2f483 100644 --- a/tests/models/segformer/test_image_processing_segformer.py +++ b/tests/models/segformer/test_image_processing_segformer.py @@ -22,7 +22,8 @@ from datasets import load_dataset from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/videomae/test_image_processing_videomae.py b/tests/models/videomae/test_image_processing_videomae.py index f792a9be844..025c39ef97f 100644 --- a/tests/models/videomae/test_image_processing_videomae.py +++ b/tests/models/videomae/test_image_processing_videomae.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_video_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_video_inputs if is_torch_available(): diff --git a/tests/models/vilt/test_image_processing_vilt.py b/tests/models/vilt/test_image_processing_vilt.py index 5816eacf835..5d7be90a747 100644 --- a/tests/models/vilt/test_image_processing_vilt.py +++ b/tests/models/vilt/test_image_processing_vilt.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/vit/test_image_processing_vit.py b/tests/models/vit/test_image_processing_vit.py index f4197425099..a0db60887e4 100644 --- a/tests/models/vit/test_image_processing_vit.py +++ b/tests/models/vit/test_image_processing_vit.py @@ -21,7 +21,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/models/yolos/test_image_processing_yolos.py b/tests/models/yolos/test_image_processing_yolos.py index 2c1571d7f7d..4e22baa4d66 100644 --- a/tests/models/yolos/test_image_processing_yolos.py +++ b/tests/models/yolos/test_image_processing_yolos.py @@ -23,7 +23,8 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs +from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin +from ...test_image_processing_common import prepare_image_inputs if is_torch_available(): diff --git a/tests/test_feature_extraction_common.py b/tests/test_feature_extraction_common.py index fe8d0248064..5c60cf58ac2 100644 --- a/tests/test_feature_extraction_common.py +++ b/tests/test_feature_extraction_common.py @@ -25,16 +25,7 @@ from pathlib import Path from huggingface_hub import HfFolder, delete_repo, set_access_token from requests.exceptions import HTTPError from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor -from transformers.testing_utils import ( - TOKEN, - USER, - check_json_file_has_correct_format, - get_tests_dir, - is_staging_test, - require_torch, - require_vision, -) -from transformers.utils import is_torch_available, is_vision_available +from transformers.testing_utils import TOKEN, USER, check_json_file_has_correct_format, get_tests_dir, is_staging_test sys.path.append(str(Path(__file__).parent.parent / "utils")) @@ -42,105 +33,9 @@ sys.path.append(str(Path(__file__).parent.parent / "utils")) from test_module.custom_feature_extraction import CustomFeatureExtractor # noqa E402 -if is_torch_available(): - import numpy as np - import torch - -if is_vision_available(): - from PIL import Image - - SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = get_tests_dir("fixtures") -def prepare_image_inputs(feature_extract_tester, equal_resolution=False, numpify=False, torchify=False): - """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True, - or a list of PyTorch tensors if one specifies torchify=True. - - One can specify whether the images are of the same resolution or not. - """ - - assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time" - - image_inputs = [] - for i in range(feature_extract_tester.batch_size): - if equal_resolution: - width = height = feature_extract_tester.max_resolution - else: - # To avoid getting image width/height 0 - min_resolution = feature_extract_tester.min_resolution - if getattr(feature_extract_tester, "size_divisor", None): - # If `size_divisor` is defined, the image needs to have width/size >= `size_divisor` - min_resolution = max(feature_extract_tester.size_divisor, min_resolution) - width, height = np.random.choice(np.arange(min_resolution, feature_extract_tester.max_resolution), 2) - image_inputs.append( - np.random.randint( - 255, - size=( - feature_extract_tester.num_channels, - width, - height, - ), - dtype=np.uint8, - ) - ) - - if not numpify and not torchify: - # PIL expects the channel dimension as last dimension - image_inputs = [Image.fromarray(np.moveaxis(image, 0, -1)) for image in image_inputs] - - if torchify: - image_inputs = [torch.from_numpy(image) for image in image_inputs] - - return image_inputs - - -def prepare_video(feature_extract_tester, width=10, height=10, numpify=False, torchify=False): - """This function prepares a video as a list of PIL images/NumPy arrays/PyTorch tensors.""" - - video = [] - for i in range(feature_extract_tester.num_frames): - video.append(np.random.randint(255, size=(feature_extract_tester.num_channels, width, height), dtype=np.uint8)) - - if not numpify and not torchify: - # PIL expects the channel dimension as last dimension - video = [Image.fromarray(np.moveaxis(frame, 0, -1)) for frame in video] - - if torchify: - video = [torch.from_numpy(frame) for frame in video] - - return video - - -def prepare_video_inputs(feature_extract_tester, equal_resolution=False, numpify=False, torchify=False): - """This function prepares a batch of videos: a list of list of PIL images, or a list of list of numpy arrays if - one specifies numpify=True, or a list of list of PyTorch tensors if one specifies torchify=True. - - One can specify whether the videos are of the same resolution or not. - """ - - assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time" - - video_inputs = [] - for i in range(feature_extract_tester.batch_size): - if equal_resolution: - width = height = feature_extract_tester.max_resolution - else: - width, height = np.random.choice( - np.arange(feature_extract_tester.min_resolution, feature_extract_tester.max_resolution), 2 - ) - video = prepare_video( - feature_extract_tester=feature_extract_tester, - width=width, - height=height, - numpify=numpify, - torchify=torchify, - ) - video_inputs.append(video) - - return video_inputs - - class FeatureExtractionSavingTestMixin: test_cast_dtype = None @@ -174,41 +69,6 @@ class FeatureExtractionSavingTestMixin: feat_extract = self.feature_extraction_class() self.assertIsNotNone(feat_extract) - @require_torch - @require_vision - def test_cast_dtype_device(self): - if self.test_cast_dtype is not None: - # Initialize feature_extractor - feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) - - # create random PyTorch tensors - image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) - - encoding = feature_extractor(image_inputs, return_tensors="pt") - # for layoutLM compatiblity - self.assertEqual(encoding.pixel_values.device, torch.device("cpu")) - self.assertEqual(encoding.pixel_values.dtype, torch.float32) - - encoding = feature_extractor(image_inputs, return_tensors="pt").to(torch.float16) - self.assertEqual(encoding.pixel_values.device, torch.device("cpu")) - self.assertEqual(encoding.pixel_values.dtype, torch.float16) - - encoding = feature_extractor(image_inputs, return_tensors="pt").to("cpu", torch.bfloat16) - self.assertEqual(encoding.pixel_values.device, torch.device("cpu")) - self.assertEqual(encoding.pixel_values.dtype, torch.bfloat16) - - with self.assertRaises(TypeError): - _ = feature_extractor(image_inputs, return_tensors="pt").to(torch.bfloat16, "cpu") - - # Try with text + image feature - encoding = feature_extractor(image_inputs, return_tensors="pt") - encoding.update({"input_ids": torch.LongTensor([[1, 2, 3], [4, 5, 6]])}) - encoding = encoding.to(torch.float16) - - self.assertEqual(encoding.pixel_values.device, torch.device("cpu")) - self.assertEqual(encoding.pixel_values.dtype, torch.float16) - self.assertEqual(encoding.input_ids.dtype, torch.long) - class FeatureExtractorUtilTester(unittest.TestCase): def test_cached_files_are_used_when_internet_is_down(self): diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py new file mode 100644 index 00000000000..d8485e3853d --- /dev/null +++ b/tests/test_image_processing_common.py @@ -0,0 +1,312 @@ +# coding=utf-8 +# Copyright 2023 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import json +import os +import sys +import tempfile +import unittest +import unittest.mock as mock +from pathlib import Path + +from huggingface_hub import HfFolder, delete_repo, set_access_token +from requests.exceptions import HTTPError +from transformers import AutoImageProcessor, ViTImageProcessor +from transformers.testing_utils import ( + TOKEN, + USER, + check_json_file_has_correct_format, + get_tests_dir, + is_staging_test, + require_torch, + require_vision, +) +from transformers.utils import is_torch_available, is_vision_available + + +sys.path.append(str(Path(__file__).parent.parent / "utils")) + +from test_module.custom_image_processing import CustomImageProcessor # noqa E402 + + +if is_torch_available(): + import numpy as np + import torch + +if is_vision_available(): + from PIL import Image + + +SAMPLE_IMAGE_PROCESSING_CONFIG_DIR = get_tests_dir("fixtures") + + +def prepare_image_inputs(image_processor_tester, equal_resolution=False, numpify=False, torchify=False): + """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True, + or a list of PyTorch tensors if one specifies torchify=True. + + One can specify whether the images are of the same resolution or not. + """ + + assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time" + + image_inputs = [] + for i in range(image_processor_tester.batch_size): + if equal_resolution: + width = height = image_processor_tester.max_resolution + else: + # To avoid getting image width/height 0 + min_resolution = image_processor_tester.min_resolution + if getattr(image_processor_tester, "size_divisor", None): + # If `size_divisor` is defined, the image needs to have width/size >= `size_divisor` + min_resolution = max(image_processor_tester.size_divisor, min_resolution) + width, height = np.random.choice(np.arange(min_resolution, image_processor_tester.max_resolution), 2) + image_inputs.append( + np.random.randint(255, size=(image_processor_tester.num_channels, width, height), dtype=np.uint8) + ) + + if not numpify and not torchify: + # PIL expects the channel dimension as last dimension + image_inputs = [Image.fromarray(np.moveaxis(image, 0, -1)) for image in image_inputs] + + if torchify: + image_inputs = [torch.from_numpy(image) for image in image_inputs] + + return image_inputs + + +def prepare_video(image_processor_tester, width=10, height=10, numpify=False, torchify=False): + """This function prepares a video as a list of PIL images/NumPy arrays/PyTorch tensors.""" + + video = [] + for i in range(image_processor_tester.num_frames): + video.append(np.random.randint(255, size=(image_processor_tester.num_channels, width, height), dtype=np.uint8)) + + if not numpify and not torchify: + # PIL expects the channel dimension as last dimension + video = [Image.fromarray(np.moveaxis(frame, 0, -1)) for frame in video] + + if torchify: + video = [torch.from_numpy(frame) for frame in video] + + return video + + +def prepare_video_inputs(image_processor_tester, equal_resolution=False, numpify=False, torchify=False): + """This function prepares a batch of videos: a list of list of PIL images, or a list of list of numpy arrays if + one specifies numpify=True, or a list of list of PyTorch tensors if one specifies torchify=True. + + One can specify whether the videos are of the same resolution or not. + """ + + assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time" + + video_inputs = [] + for i in range(image_processor_tester.batch_size): + if equal_resolution: + width = height = image_processor_tester.max_resolution + else: + width, height = np.random.choice( + np.arange(image_processor_tester.min_resolution, image_processor_tester.max_resolution), 2 + ) + video = prepare_video( + image_processor_tester=image_processor_tester, + width=width, + height=height, + numpify=numpify, + torchify=torchify, + ) + video_inputs.append(video) + + return video_inputs + + +class ImageProcessingSavingTestMixin: + test_cast_dtype = None + + def test_image_processor_to_json_string(self): + image_processor = self.image_processing_class(**self.image_processor_dict) + obj = json.loads(image_processor.to_json_string()) + for key, value in self.image_processor_dict.items(): + self.assertEqual(obj[key], value) + + def test_image_processor_to_json_file(self): + image_processor_first = self.image_processing_class(**self.image_processor_dict) + + with tempfile.TemporaryDirectory() as tmpdirname: + json_file_path = os.path.join(tmpdirname, "image_processor.json") + image_processor_first.to_json_file(json_file_path) + image_processor_second = self.image_processing_class.from_json_file(json_file_path) + + self.assertEqual(image_processor_second.to_dict(), image_processor_first.to_dict()) + + def test_image_processor_from_and_save_pretrained(self): + image_processor_first = self.image_processing_class(**self.image_processor_dict) + + with tempfile.TemporaryDirectory() as tmpdirname: + saved_file = image_processor_first.save_pretrained(tmpdirname)[0] + check_json_file_has_correct_format(saved_file) + image_processor_second = self.image_processing_class.from_pretrained(tmpdirname) + + self.assertEqual(image_processor_second.to_dict(), image_processor_first.to_dict()) + + def test_init_without_params(self): + image_processor = self.image_processing_class() + self.assertIsNotNone(image_processor) + + @require_torch + @require_vision + def test_cast_dtype_device(self): + if self.test_cast_dtype is not None: + # Initialize image_processor + image_processor = self.image_processing_class(**self.image_processor_dict) + + # create random PyTorch tensors + image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True) + + encoding = image_processor(image_inputs, return_tensors="pt") + # for layoutLM compatiblity + self.assertEqual(encoding.pixel_values.device, torch.device("cpu")) + self.assertEqual(encoding.pixel_values.dtype, torch.float32) + + encoding = image_processor(image_inputs, return_tensors="pt").to(torch.float16) + self.assertEqual(encoding.pixel_values.device, torch.device("cpu")) + self.assertEqual(encoding.pixel_values.dtype, torch.float16) + + encoding = image_processor(image_inputs, return_tensors="pt").to("cpu", torch.bfloat16) + self.assertEqual(encoding.pixel_values.device, torch.device("cpu")) + self.assertEqual(encoding.pixel_values.dtype, torch.bfloat16) + + with self.assertRaises(TypeError): + _ = image_processor(image_inputs, return_tensors="pt").to(torch.bfloat16, "cpu") + + # Try with text + image feature + encoding = image_processor(image_inputs, return_tensors="pt") + encoding.update({"input_ids": torch.LongTensor([[1, 2, 3], [4, 5, 6]])}) + encoding = encoding.to(torch.float16) + + self.assertEqual(encoding.pixel_values.device, torch.device("cpu")) + self.assertEqual(encoding.pixel_values.dtype, torch.float16) + self.assertEqual(encoding.input_ids.dtype, torch.long) + + +class ImageProcessorUtilTester(unittest.TestCase): + def test_cached_files_are_used_when_internet_is_down(self): + # A mock response for an HTTP head request to emulate server down + response_mock = mock.Mock() + response_mock.status_code = 500 + response_mock.headers = {} + response_mock.raise_for_status.side_effect = HTTPError + response_mock.json.return_value = {} + + # Download this model to make sure it's in the cache. + _ = ViTImageProcessor.from_pretrained("hf-internal-testing/tiny-random-vit") + # Under the mock environment we get a 500 error when trying to reach the model. + with mock.patch("requests.request", return_value=response_mock) as mock_head: + _ = ViTImageProcessor.from_pretrained("hf-internal-testing/tiny-random-vit") + # This check we did call the fake head request + mock_head.assert_called() + + def test_legacy_load_from_url(self): + # This test is for deprecated behavior and can be removed in v5 + _ = ViTImageProcessor.from_pretrained( + "https://huggingface.co/hf-internal-testing/tiny-random-vit/resolve/main/preprocessor_config.json" + ) + + +@is_staging_test +class ImageProcessorPushToHubTester(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._token = TOKEN + set_access_token(TOKEN) + HfFolder.save_token(TOKEN) + + @classmethod + def tearDownClass(cls): + try: + delete_repo(token=cls._token, repo_id="test-image-processor") + except HTTPError: + pass + + try: + delete_repo(token=cls._token, repo_id="valid_org/test-image-processor-org") + except HTTPError: + pass + + try: + delete_repo(token=cls._token, repo_id="test-dynamic-image-processor") + except HTTPError: + pass + + def test_push_to_hub(self): + image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR) + image_processor.push_to_hub("test-image-processor", use_auth_token=self._token) + + new_image_processor = ViTImageProcessor.from_pretrained(f"{USER}/test-image-processor") + for k, v in image_processor.__dict__.items(): + self.assertEqual(v, getattr(new_image_processor, k)) + + # Reset repo + delete_repo(token=self._token, repo_id="test-image-processor") + + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + image_processor.save_pretrained( + tmp_dir, repo_id="test-image-processor", push_to_hub=True, use_auth_token=self._token + ) + + new_image_processor = ViTImageProcessor.from_pretrained(f"{USER}/test-image-processor") + for k, v in image_processor.__dict__.items(): + self.assertEqual(v, getattr(new_image_processor, k)) + + def test_push_to_hub_in_organization(self): + image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR) + image_processor.push_to_hub("valid_org/test-image-processor", use_auth_token=self._token) + + new_image_processor = ViTImageProcessor.from_pretrained("valid_org/test-image-processor") + for k, v in image_processor.__dict__.items(): + self.assertEqual(v, getattr(new_image_processor, k)) + + # Reset repo + delete_repo(token=self._token, repo_id="valid_org/test-image-processor") + + # Push to hub via save_pretrained + with tempfile.TemporaryDirectory() as tmp_dir: + image_processor.save_pretrained( + tmp_dir, repo_id="valid_org/test-image-processor-org", push_to_hub=True, use_auth_token=self._token + ) + + new_image_processor = ViTImageProcessor.from_pretrained("valid_org/test-image-processor-org") + for k, v in image_processor.__dict__.items(): + self.assertEqual(v, getattr(new_image_processor, k)) + + def test_push_to_hub_dynamic_image_processor(self): + CustomImageProcessor.register_for_auto_class() + image_processor = CustomImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR) + + image_processor.push_to_hub("test-dynamic-image-processor", use_auth_token=self._token) + + # This has added the proper auto_map field to the config + self.assertDictEqual( + image_processor.auto_map, + {"ImageProcessor": "custom_image_processing.CustomImageProcessor"}, + ) + + new_image_processor = AutoImageProcessor.from_pretrained( + f"{USER}/test-dynamic-image-processor", trust_remote_code=True + ) + # Can't make an isinstance check because the new_image_processor is from the CustomImageProcessor class of a dynamic module + self.assertEqual(new_image_processor.__class__.__name__, "CustomImageProcessor") diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py index 82501d98bc6..d388c11361e 100644 --- a/utils/tests_fetcher.py +++ b/utils/tests_fetcher.py @@ -353,6 +353,7 @@ SPECIAL_MODULE_TO_TEST_MAP = { "feature_extraction_sequence_utils.py": "test_sequence_feature_extraction_common.py", "feature_extraction_utils.py": "test_feature_extraction_common.py", "file_utils.py": ["utils/test_file_utils.py", "utils/test_model_output.py"], + "image_processing_utils.py": ["test_image_processing_common.py", "utils/test_image_processing_utils.py"], "image_transforms.py": "test_image_transforms.py", "utils/generic.py": ["utils/test_file_utils.py", "utils/test_model_output.py", "utils/test_generic.py"], "utils/hub.py": "utils/test_hub_utils.py",