From 1c81132e80478e278681686fe44dfec793d5dee9 Mon Sep 17 00:00:00 2001 From: Pablo Montalvo <39954772+molbap@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:20:20 +0100 Subject: [PATCH] Raise unused kwargs image processor (#29063) * draft processor arg capture * add missing vivit model * add new common test for image preprocess signature * fix quality * fix up * add back missing validations * quality * move info level to warning for unused kwargs --- src/transformers/image_utils.py | 8 ++++ .../models/beit/image_processing_beit.py | 24 ++++++++++++ .../models/bit/image_processing_bit.py | 20 ++++++++++ .../models/blip/image_processing_blip.py | 18 +++++++++ .../image_processing_bridgetower.py | 21 ++++++++++ .../image_processing_chinese_clip.py | 20 ++++++++++ .../models/clip/image_processing_clip.py | 20 ++++++++++ .../image_processing_conditional_detr.py | 22 +++++++++++ .../convnext/image_processing_convnext.py | 18 +++++++++ .../image_processing_deformable_detr.py | 22 +++++++++++ .../models/deit/image_processing_deit.py | 19 ++++++++++ .../models/detr/image_processing_detr.py | 22 +++++++++++ .../models/donut/image_processing_donut.py | 21 ++++++++++ .../models/dpt/image_processing_dpt.py | 21 ++++++++++ .../image_processing_efficientformer.py | 19 ++++++++++ .../image_processing_efficientnet.py | 21 ++++++++++ .../models/flava/image_processing_flava.py | 38 +++++++++++++++++++ .../models/fuyu/image_processing_fuyu.py | 18 +++++++++ .../models/glpn/image_processing_glpn.py | 13 +++++++ .../imagegpt/image_processing_imagegpt.py | 15 ++++++++ .../layoutlmv2/image_processing_layoutlmv2.py | 15 ++++++++ .../layoutlmv3/image_processing_layoutlmv3.py | 20 ++++++++++ .../models/levit/image_processing_levit.py | 19 ++++++++++ .../image_processing_mask2former.py | 22 +++++++++++ .../maskformer/image_processing_maskformer.py | 22 +++++++++++ .../image_processing_mobilenet_v1.py | 19 ++++++++++ .../image_processing_mobilenet_v2.py | 19 ++++++++++ .../mobilevit/image_processing_mobilevit.py | 19 ++++++++++ .../models/nougat/image_processing_nougat.py | 21 ++++++++++ .../oneformer/image_processing_oneformer.py | 23 +++++++++++ .../models/owlv2/image_processing_owlv2.py | 17 +++++++++ .../models/owlvit/image_processing_owlvit.py | 18 +++++++++ .../perceiver/image_processing_perceiver.py | 19 ++++++++++ .../poolformer/image_processing_poolformer.py | 20 ++++++++++ .../models/pvt/image_processing_pvt.py | 17 +++++++++ .../models/sam/image_processing_sam.py | 23 +++++++++++ .../segformer/image_processing_segformer.py | 20 ++++++++++ .../models/siglip/image_processing_siglip.py | 17 +++++++++ .../swin2sr/image_processing_swin2sr.py | 13 +++++++ .../models/tvlt/image_processing_tvlt.py | 22 +++++++++++ .../models/tvp/image_processing_tvp.py | 24 ++++++++++++ .../videomae/image_processing_videomae.py | 19 ++++++++++ .../models/vilt/image_processing_vilt.py | 19 ++++++++++ .../models/vit/image_processing_vit.py | 17 +++++++++ .../vit_hybrid/image_processing_vit_hybrid.py | 20 ++++++++++ .../vitmatte/image_processing_vitmatte.py | 19 +++++++++- .../models/vivit/image_processing_vivit.py | 20 ++++++++++ .../models/yolos/image_processing_yolos.py | 22 +++++++++++ tests/test_image_processing_common.py | 11 ++++++ 49 files changed, 954 insertions(+), 2 deletions(-) diff --git a/src/transformers/image_utils.py b/src/transformers/image_utils.py index 2f2868507fb..a7e53b3fe7d 100644 --- a/src/transformers/image_utils.py +++ b/src/transformers/image_utils.py @@ -759,3 +759,11 @@ def validate_annotations( "(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with " "the latter being a list of annotations in the COCO format." ) + + +def validate_kwargs(valid_processor_keys: List[str], captured_kwargs: List[str]): + unused_keys = set(captured_kwargs).difference(set(valid_processor_keys)) + if unused_keys: + unused_key_str = ", ".join(unused_keys) + # TODO raise a warning here instead of simply logging? + logger.warning(f"Unused or unrecognized kwargs: {unused_key_str}.") diff --git a/src/transformers/models/beit/image_processing_beit.py b/src/transformers/models/beit/image_processing_beit.py index 52c1a813f60..5e15fe645cf 100644 --- a/src/transformers/models/beit/image_processing_beit.py +++ b/src/transformers/models/beit/image_processing_beit.py @@ -32,6 +32,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_torch_available, is_torch_tensor, is_vision_available, logging @@ -130,6 +131,24 @@ class BeitImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD self.do_reduce_labels = do_reduce_labels + self._valid_processor_keys = [ + "images", + "segmentation_maps", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_reduce_labels", + "return_tensors", + "data_format", + "input_data_format", + ] @classmethod def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): @@ -337,6 +356,9 @@ class BeitImageProcessor(BaseImageProcessor): images (`ImageInput`): Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`. + segmentation_maps (`ImageInput`, *optional*) + Segmentation maps to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -396,6 +418,8 @@ class BeitImageProcessor(BaseImageProcessor): image_std = image_std if image_std is not None else self.image_std do_reduce_labels = do_reduce_labels if do_reduce_labels is not None else self.do_reduce_labels + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + images = make_list_of_images(images) if segmentation_maps is not None: diff --git a/src/transformers/models/bit/image_processing_bit.py b/src/transformers/models/bit/image_processing_bit.py index df9336c3479..c9d5c7a7594 100644 --- a/src/transformers/models/bit/image_processing_bit.py +++ b/src/transformers/models/bit/image_processing_bit.py @@ -36,6 +36,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -121,6 +122,23 @@ class BitImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD self.do_convert_rgb = do_convert_rgb + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_convert_rgb", + "return_tensors", + "data_format", + "input_data_format", + ] # Copied from transformers.models.clip.image_processing_clip.CLIPImageProcessor.resize def resize( @@ -256,6 +274,8 @@ class BitImageProcessor(BaseImageProcessor): image_std = image_std if image_std is not None else self.image_std do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + images = make_list_of_images(images) if not valid_images(images): diff --git a/src/transformers/models/blip/image_processing_blip.py b/src/transformers/models/blip/image_processing_blip.py index fa65624937f..a65ccc2d983 100644 --- a/src/transformers/models/blip/image_processing_blip.py +++ b/src/transformers/models/blip/image_processing_blip.py @@ -31,6 +31,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -106,6 +107,21 @@ class BlipImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD self.do_convert_rgb = do_convert_rgb + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_convert_rgb", + "return_tensors", + "data_format", + "input_data_format", + ] # Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize with PILImageResampling.BILINEAR->PILImageResampling.BICUBIC def resize( @@ -234,6 +250,8 @@ class BlipImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/bridgetower/image_processing_bridgetower.py b/src/transformers/models/bridgetower/image_processing_bridgetower.py index 3053c72a4c5..8fc62ad3970 100644 --- a/src/transformers/models/bridgetower/image_processing_bridgetower.py +++ b/src/transformers/models/bridgetower/image_processing_bridgetower.py @@ -32,6 +32,7 @@ from ...image_utils import ( is_scaled_image, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -204,6 +205,24 @@ class BridgeTowerImageProcessor(BaseImageProcessor): self.do_pad = do_pad self.do_center_crop = do_center_crop self.crop_size = crop_size + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "size_divisor", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_pad", + "do_center_crop", + "crop_size", + "return_tensors", + "data_format", + "input_data_format", + ] # Copied from transformers.models.vilt.image_processing_vilt.ViltImageProcessor.resize def resize( @@ -465,6 +484,8 @@ class BridgeTowerImageProcessor(BaseImageProcessor): size = size if size is not None else self.size size = get_size_dict(size, default_to_square=False) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not is_batched(images): images = [images] diff --git a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py index 0216bc5431e..60f40272bf9 100644 --- a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py +++ b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py @@ -36,6 +36,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -121,6 +122,23 @@ class ChineseCLIPImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD self.do_convert_rgb = do_convert_rgb + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_convert_rgb", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -247,6 +265,8 @@ class ChineseCLIPImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/clip/image_processing_clip.py b/src/transformers/models/clip/image_processing_clip.py index 6549a572d86..fd2f8b3d532 100644 --- a/src/transformers/models/clip/image_processing_clip.py +++ b/src/transformers/models/clip/image_processing_clip.py @@ -36,6 +36,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -121,6 +122,23 @@ class CLIPImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD self.do_convert_rgb = do_convert_rgb + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_convert_rgb", + "return_tensors", + "data_format", + "input_data_format", + ] # for backwards compatibility of KOSMOS-2 if "use_square_size" in kwargs: @@ -259,6 +277,8 @@ class CLIPImageProcessor(BaseImageProcessor): image_std = image_std if image_std is not None else self.image_std do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + images = make_list_of_images(images) if not valid_images(images): diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py index 0af79bbcb93..1a473fb841a 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py @@ -49,6 +49,7 @@ from ...image_utils import ( to_numpy_array, valid_images, validate_annotations, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import ( @@ -845,6 +846,26 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD self.do_pad = do_pad + self._valid_processor_keys = [ + "images", + "annotations", + "return_segmentation_masks", + "masks_path", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "do_convert_annotations", + "image_mean", + "image_std", + "do_pad", + "format", + "return_tensors", + "data_format", + "input_data_format", + ] @classmethod # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.from_dict with Detr->ConditionalDetr @@ -1299,6 +1320,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " "torch.Tensor, tf.Tensor or jax.ndarray." ) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated. diff --git a/src/transformers/models/convnext/image_processing_convnext.py b/src/transformers/models/convnext/image_processing_convnext.py index 6d6476e7721..54060105f59 100644 --- a/src/transformers/models/convnext/image_processing_convnext.py +++ b/src/transformers/models/convnext/image_processing_convnext.py @@ -36,6 +36,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -113,6 +114,21 @@ class ConvNextImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "crop_pct", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -260,6 +276,8 @@ class ConvNextImageProcessor(BaseImageProcessor): size = size if size is not None else self.size size = get_size_dict(size, default_to_square=False) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + images = make_list_of_images(images) if not valid_images(images): diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py index ef4dc7f3e57..cd3ac90a47a 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py @@ -49,6 +49,7 @@ from ...image_utils import ( to_numpy_array, valid_images, validate_annotations, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import ( @@ -843,6 +844,26 @@ class DeformableDetrImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD self.do_pad = do_pad + self._valid_processor_keys = [ + "images", + "annotations", + "return_segmentation_masks", + "masks_path", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "do_convert_annotations", + "image_mean", + "image_std", + "do_pad", + "format", + "return_tensors", + "data_format", + "input_data_format", + ] @classmethod # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.from_dict with Detr->DeformableDetr @@ -1297,6 +1318,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor): "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " "torch.Tensor, tf.Tensor or jax.ndarray." ) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated. diff --git a/src/transformers/models/deit/image_processing_deit.py b/src/transformers/models/deit/image_processing_deit.py index 15e820570c0..2a8ebb36377 100644 --- a/src/transformers/models/deit/image_processing_deit.py +++ b/src/transformers/models/deit/image_processing_deit.py @@ -31,6 +31,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -109,6 +110,22 @@ class DeiTImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] # Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize with PILImageResampling.BILINEAR->PILImageResampling.BICUBIC def resize( @@ -240,6 +257,8 @@ class DeiTImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py index 0a7a6e2dbd5..71768a8e7b0 100644 --- a/src/transformers/models/detr/image_processing_detr.py +++ b/src/transformers/models/detr/image_processing_detr.py @@ -48,6 +48,7 @@ from ...image_utils import ( to_numpy_array, valid_images, validate_annotations, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import ( @@ -828,6 +829,26 @@ class DetrImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD self.do_pad = do_pad + self._valid_processor_keys = [ + "images", + "annotations", + "return_segmentation_masks", + "masks_path", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "do_convert_annotations", + "image_mean", + "image_std", + "do_pad", + "format", + "return_tensors", + "data_format", + "input_data_format", + ] @classmethod def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): @@ -1269,6 +1290,7 @@ class DetrImageProcessor(BaseImageProcessor): "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " "torch.Tensor, tf.Tensor or jax.ndarray." ) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated. diff --git a/src/transformers/models/donut/image_processing_donut.py b/src/transformers/models/donut/image_processing_donut.py index a1759331624..1c6e4723139 100644 --- a/src/transformers/models/donut/image_processing_donut.py +++ b/src/transformers/models/donut/image_processing_donut.py @@ -37,6 +37,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -123,6 +124,24 @@ class DonutImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_thumbnail", + "do_align_long_axis", + "do_pad", + "random_padding", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def align_long_axis( self, @@ -388,6 +407,8 @@ class DonutImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/dpt/image_processing_dpt.py b/src/transformers/models/dpt/image_processing_dpt.py index 29aac9d005b..96f43a796e3 100644 --- a/src/transformers/models/dpt/image_processing_dpt.py +++ b/src/transformers/models/dpt/image_processing_dpt.py @@ -35,6 +35,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -164,6 +165,24 @@ class DPTImageProcessor(BaseImageProcessor): self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD self.do_pad = do_pad self.size_divisor = size_divisor + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "keep_aspect_ratio", + "ensure_multiple_of", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_pad", + "size_divisor", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -350,6 +369,8 @@ class DPTImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/efficientformer/image_processing_efficientformer.py b/src/transformers/models/efficientformer/image_processing_efficientformer.py index 7db37c20b7f..38756f7c958 100644 --- a/src/transformers/models/efficientformer/image_processing_efficientformer.py +++ b/src/transformers/models/efficientformer/image_processing_efficientformer.py @@ -35,6 +35,7 @@ from ...image_utils import ( is_scaled_image, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -112,6 +113,22 @@ class EfficientFormerImageProcessor(BaseImageProcessor): self.rescale_factor = rescale_factor self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -238,6 +255,8 @@ class EfficientFormerImageProcessor(BaseImageProcessor): size = size if size is not None else self.size size_dict = get_size_dict(size) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not is_batched(images): images = [images] diff --git a/src/transformers/models/efficientnet/image_processing_efficientnet.py b/src/transformers/models/efficientnet/image_processing_efficientnet.py index ee4690e0fb9..4fd2364a302 100644 --- a/src/transformers/models/efficientnet/image_processing_efficientnet.py +++ b/src/transformers/models/efficientnet/image_processing_efficientnet.py @@ -31,6 +31,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -118,6 +119,24 @@ class EfficientNetImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD self.include_top = include_top + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "rescale_offset", + "do_normalize", + "image_mean", + "image_std", + "include_top", + "return_tensors", + "data_format", + "input_data_format", + ] # Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize with PILImageResampling.BILINEAR->PILImageResampling.NEAREST def resize( @@ -297,6 +316,8 @@ class EfficientNetImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/flava/image_processing_flava.py b/src/transformers/models/flava/image_processing_flava.py index 168e3e8e2e3..d6a7c8080bb 100644 --- a/src/transformers/models/flava/image_processing_flava.py +++ b/src/transformers/models/flava/image_processing_flava.py @@ -34,6 +34,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -301,6 +302,41 @@ class FlavaImageProcessor(BaseImageProcessor): self.codebook_image_mean = codebook_image_mean self.codebook_image_mean = codebook_image_mean if codebook_image_mean is not None else FLAVA_CODEBOOK_MEAN self.codebook_image_std = codebook_image_std if codebook_image_std is not None else FLAVA_CODEBOOK_STD + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_image_mask", + "input_size_patches", + "total_mask_patches", + "mask_group_min_patches", + "mask_group_max_patches", + "mask_group_min_aspect_ratio", + "mask_group_max_aspect_ratio", + "return_codebook_pixels", + "codebook_do_resize", + "codebook_size", + "codebook_resample", + "codebook_do_center_crop", + "codebook_crop_size", + "codebook_do_rescale", + "codebook_rescale_factor", + "codebook_do_map_pixels", + "codebook_do_normalize", + "codebook_image_mean", + "codebook_image_std", + "return_tensors", + "data_format", + "input_data_format", + ] @classmethod def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): @@ -636,6 +672,8 @@ class FlavaImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/fuyu/image_processing_fuyu.py b/src/transformers/models/fuyu/image_processing_fuyu.py index 70ff3e725d2..ec5e1a36abb 100644 --- a/src/transformers/models/fuyu/image_processing_fuyu.py +++ b/src/transformers/models/fuyu/image_processing_fuyu.py @@ -261,6 +261,24 @@ class FuyuImageProcessor(BaseImageProcessor): self.do_rescale = do_rescale self.rescale_factor = rescale_factor self.patch_size = patch_size if patch_size is not None else {"height": 30, "width": 30} + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_pad", + "padding_value", + "padding_mode", + "do_normalize", + "image_mean", + "image_std", + "do_rescale", + "rescale_factor", + "patch_size", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, diff --git a/src/transformers/models/glpn/image_processing_glpn.py b/src/transformers/models/glpn/image_processing_glpn.py index 2be3e3c90b3..7577b4eeb3d 100644 --- a/src/transformers/models/glpn/image_processing_glpn.py +++ b/src/transformers/models/glpn/image_processing_glpn.py @@ -30,6 +30,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -71,6 +72,16 @@ class GLPNImageProcessor(BaseImageProcessor): self.size_divisor = size_divisor self.resample = resample super().__init__(**kwargs) + self._valid_processor_keys = [ + "images", + "do_resize", + "size_divisor", + "resample", + "do_rescale", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -176,6 +187,8 @@ class GLPNImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/imagegpt/image_processing_imagegpt.py b/src/transformers/models/imagegpt/image_processing_imagegpt.py index d85803a5a61..fecdd061d4e 100644 --- a/src/transformers/models/imagegpt/image_processing_imagegpt.py +++ b/src/transformers/models/imagegpt/image_processing_imagegpt.py @@ -29,6 +29,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -102,6 +103,18 @@ class ImageGPTImageProcessor(BaseImageProcessor): self.resample = resample self.do_normalize = do_normalize self.do_color_quantize = do_color_quantize + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_normalize", + "do_color_quantize", + "clusters", + "return_tensors", + "data_format", + "input_data_format", + ] # Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize def resize( @@ -238,6 +251,8 @@ class ImageGPTImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py index a56cb8dd10a..e2369911941 100644 --- a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py @@ -28,6 +28,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_pytesseract_available, is_vision_available, logging, requires_backends @@ -137,6 +138,18 @@ class LayoutLMv2ImageProcessor(BaseImageProcessor): self.apply_ocr = apply_ocr self.ocr_lang = ocr_lang self.tesseract_config = tesseract_config + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "apply_ocr", + "ocr_lang", + "tesseract_config", + "return_tensors", + "data_format", + "input_data_format", + ] # Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize def resize( @@ -244,6 +257,8 @@ class LayoutLMv2ImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index c2461ad60da..8c5356993f1 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -31,6 +31,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_pytesseract_available, is_vision_available, logging, requires_backends @@ -164,6 +165,23 @@ class LayoutLMv3ImageProcessor(BaseImageProcessor): self.apply_ocr = apply_ocr self.ocr_lang = ocr_lang self.tesseract_config = tesseract_config + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "apply_ocr", + "ocr_lang", + "tesseract_config", + "return_tensors", + "data_format", + "input_data_format", + ] # Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize def resize( @@ -298,6 +316,8 @@ class LayoutLMv3ImageProcessor(BaseImageProcessor): tesseract_config = tesseract_config if tesseract_config is not None else self.tesseract_config images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/levit/image_processing_levit.py b/src/transformers/models/levit/image_processing_levit.py index a21e5750c70..b861a4ebf8b 100644 --- a/src/transformers/models/levit/image_processing_levit.py +++ b/src/transformers/models/levit/image_processing_levit.py @@ -35,6 +35,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -115,6 +116,22 @@ class LevitImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -254,6 +271,8 @@ class LevitImageProcessor(BaseImageProcessor): crop_size = get_size_dict(crop_size, param_name="crop_size") images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/mask2former/image_processing_mask2former.py b/src/transformers/models/mask2former/image_processing_mask2former.py index 154a531c8b0..5440584d25f 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former.py +++ b/src/transformers/models/mask2former/image_processing_mask2former.py @@ -39,6 +39,7 @@ from ...image_utils import ( is_scaled_image, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import ( @@ -439,6 +440,25 @@ class Mask2FormerImageProcessor(BaseImageProcessor): self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD self.ignore_index = ignore_index self.reduce_labels = reduce_labels + self._valid_processor_keys = [ + "images", + "segmentation_maps", + "instance_id_to_semantic_id", + "do_resize", + "size", + "size_divisor", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "ignore_index", + "reduce_labels", + "return_tensors", + "data_format", + "input_data_format", + ] @classmethod def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): @@ -708,6 +728,8 @@ class Mask2FormerImageProcessor(BaseImageProcessor): ignore_index = ignore_index if ignore_index is not None else self.ignore_index reduce_labels = reduce_labels if reduce_labels is not None else self.reduce_labels + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py index a5d940c6531..3c854b35c76 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer.py +++ b/src/transformers/models/maskformer/image_processing_maskformer.py @@ -39,6 +39,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import ( @@ -448,6 +449,25 @@ class MaskFormerImageProcessor(BaseImageProcessor): self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD self.ignore_index = ignore_index self.do_reduce_labels = do_reduce_labels + self._valid_processor_keys = [ + "images", + "segmentation_maps", + "instance_id_to_semantic_id", + "do_resize", + "size", + "size_divisor", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "ignore_index", + "do_reduce_labels", + "return_tensors", + "data_format", + "input_data_format", + ] @classmethod def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): @@ -730,6 +750,8 @@ class MaskFormerImageProcessor(BaseImageProcessor): "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " "torch.Tensor, tf.Tensor or jax.ndarray." ) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + validate_preprocess_arguments( do_rescale=do_rescale, rescale_factor=rescale_factor, diff --git a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py index 9f59c17d1d5..086ab892492 100644 --- a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +++ b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py @@ -35,6 +35,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -113,6 +114,22 @@ class MobileNetV1ImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] # Copied from transformers.models.clip.image_processing_clip.CLIPImageProcessor.resize def resize( @@ -245,6 +262,8 @@ class MobileNetV1ImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py index dcf82e8d168..44b784d2a7c 100644 --- a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +++ b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py @@ -35,6 +35,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_torch_available, is_torch_tensor, logging @@ -117,6 +118,22 @@ class MobileNetV2ImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] # Copied from transformers.models.mobilenet_v1.image_processing_mobilenet_v1.MobileNetV1ImageProcessor.resize def resize( @@ -249,6 +266,8 @@ class MobileNetV2ImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/mobilevit/image_processing_mobilevit.py b/src/transformers/models/mobilevit/image_processing_mobilevit.py index 32bbf3d5d36..8cc79a283e0 100644 --- a/src/transformers/models/mobilevit/image_processing_mobilevit.py +++ b/src/transformers/models/mobilevit/image_processing_mobilevit.py @@ -29,6 +29,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_torch_available, is_torch_tensor, is_vision_available, logging @@ -104,6 +105,21 @@ class MobileViTImageProcessor(BaseImageProcessor): self.do_center_crop = do_center_crop self.crop_size = crop_size self.do_flip_channel_order = do_flip_channel_order + self._valid_processor_keys = [ + "images", + "segmentation_maps", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_center_crop", + "crop_size", + "do_flip_channel_order", + "return_tensors", + "data_format", + "input_data_format", + ] # Copied from transformers.models.mobilenet_v1.image_processing_mobilenet_v1.MobileNetV1ImageProcessor.resize with PILImageResampling.BICUBIC->PILImageResampling.BILINEAR def resize( @@ -366,6 +382,9 @@ class MobileViTImageProcessor(BaseImageProcessor): crop_size = get_size_dict(crop_size, param_name="crop_size") images = make_list_of_images(images) + + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if segmentation_maps is not None: segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2) diff --git a/src/transformers/models/nougat/image_processing_nougat.py b/src/transformers/models/nougat/image_processing_nougat.py index 448c9f21c4a..49913d5baa0 100644 --- a/src/transformers/models/nougat/image_processing_nougat.py +++ b/src/transformers/models/nougat/image_processing_nougat.py @@ -38,6 +38,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -125,6 +126,24 @@ class NougatImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD + self._valid_processor_keys = [ + "images", + "do_crop_margin", + "do_resize", + "size", + "resample", + "do_thumbnail", + "do_align_long_axis", + "do_pad", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def python_find_non_zero(self, image: np.array): """This is a reimplementation of a findNonZero function equivalent to cv2.""" @@ -442,6 +461,8 @@ class NougatImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/oneformer/image_processing_oneformer.py b/src/transformers/models/oneformer/image_processing_oneformer.py index 23b3fa69569..d9b0c016868 100644 --- a/src/transformers/models/oneformer/image_processing_oneformer.py +++ b/src/transformers/models/oneformer/image_processing_oneformer.py @@ -42,6 +42,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import ( @@ -467,6 +468,25 @@ class OneFormerImageProcessor(BaseImageProcessor): self.repo_path = repo_path self.metadata = prepare_metadata(load_metadata(repo_path, class_info_file)) self.num_text = num_text + self._valid_processor_keys = [ + "images", + "task_inputs", + "segmentation_maps", + "instance_id_to_semantic_id", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "ignore_index", + "do_reduce_labels", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -714,6 +734,9 @@ class OneFormerImageProcessor(BaseImageProcessor): "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " "torch.Tensor, tf.Tensor or jax.ndarray." ) + + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + validate_preprocess_arguments( do_rescale=do_rescale, rescale_factor=rescale_factor, diff --git a/src/transformers/models/owlv2/image_processing_owlv2.py b/src/transformers/models/owlv2/image_processing_owlv2.py index 21f09060cd0..06ac984c7d8 100644 --- a/src/transformers/models/owlv2/image_processing_owlv2.py +++ b/src/transformers/models/owlv2/image_processing_owlv2.py @@ -37,6 +37,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import ( @@ -232,6 +233,20 @@ class Owlv2ImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD + self._valid_processor_keys = [ + "images", + "do_pad", + "do_resize", + "size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def pad( self, @@ -401,6 +416,8 @@ class Owlv2ImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/owlvit/image_processing_owlvit.py b/src/transformers/models/owlvit/image_processing_owlvit.py index 961707725db..25ea5f2720d 100644 --- a/src/transformers/models/owlvit/image_processing_owlvit.py +++ b/src/transformers/models/owlvit/image_processing_owlvit.py @@ -38,6 +38,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_torch_available, logging @@ -166,6 +167,22 @@ class OwlViTImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -356,6 +373,7 @@ class OwlViTImageProcessor(BaseImageProcessor): "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " "torch.Tensor, tf.Tensor or jax.ndarray." ) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) validate_preprocess_arguments( do_rescale=do_rescale, diff --git a/src/transformers/models/perceiver/image_processing_perceiver.py b/src/transformers/models/perceiver/image_processing_perceiver.py index 599e48d77a0..02dd527e437 100644 --- a/src/transformers/models/perceiver/image_processing_perceiver.py +++ b/src/transformers/models/perceiver/image_processing_perceiver.py @@ -32,6 +32,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -113,6 +114,22 @@ class PerceiverImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD + self._valid_processor_keys = [ + "images", + "do_center_crop", + "crop_size", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def center_crop( self, @@ -286,6 +303,8 @@ class PerceiverImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/poolformer/image_processing_poolformer.py b/src/transformers/models/poolformer/image_processing_poolformer.py index dab7392fbb0..dcdb1591b1c 100644 --- a/src/transformers/models/poolformer/image_processing_poolformer.py +++ b/src/transformers/models/poolformer/image_processing_poolformer.py @@ -35,6 +35,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -132,6 +133,23 @@ class PoolFormerImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "crop_pct", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -293,6 +311,8 @@ class PoolFormerImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/pvt/image_processing_pvt.py b/src/transformers/models/pvt/image_processing_pvt.py index ada7eaec4aa..f3907edf3af 100644 --- a/src/transformers/models/pvt/image_processing_pvt.py +++ b/src/transformers/models/pvt/image_processing_pvt.py @@ -31,6 +31,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -95,6 +96,20 @@ class PvtImageProcessor(BaseImageProcessor): self.rescale_factor = rescale_factor self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] # Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize def resize( @@ -218,6 +233,8 @@ class PvtImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/sam/image_processing_sam.py b/src/transformers/models/sam/image_processing_sam.py index 911e3fd0ff5..ccdc72fc7ba 100644 --- a/src/transformers/models/sam/image_processing_sam.py +++ b/src/transformers/models/sam/image_processing_sam.py @@ -34,6 +34,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import ( @@ -160,6 +161,26 @@ class SamImageProcessor(BaseImageProcessor): self.pad_size = pad_size self.mask_pad_size = mask_pad_size self.do_convert_rgb = do_convert_rgb + self._valid_processor_keys = [ + "images", + "segmentation_maps", + "do_resize", + "size", + "mask_size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_pad", + "pad_size", + "mask_pad_size", + "do_convert_rgb", + "return_tensors", + "data_format", + "input_data_format", + ] def pad_image( self, @@ -491,6 +512,8 @@ class SamImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/segformer/image_processing_segformer.py b/src/transformers/models/segformer/image_processing_segformer.py index ff12108a301..1fef9645549 100644 --- a/src/transformers/models/segformer/image_processing_segformer.py +++ b/src/transformers/models/segformer/image_processing_segformer.py @@ -32,6 +32,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_torch_available, is_torch_tensor, is_vision_available, logging @@ -118,6 +119,22 @@ class SegformerImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD self.do_reduce_labels = do_reduce_labels + self._valid_processor_keys = [ + "images", + "segmentation_maps", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_reduce_labels", + "return_tensors", + "data_format", + "input_data_format", + ] @classmethod def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): @@ -380,6 +397,9 @@ class SegformerImageProcessor(BaseImageProcessor): image_std = image_std if image_std is not None else self.image_std images = make_list_of_images(images) + + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if segmentation_maps is not None: segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2) diff --git a/src/transformers/models/siglip/image_processing_siglip.py b/src/transformers/models/siglip/image_processing_siglip.py index 7796a6e3d29..5f24ffb0a2a 100644 --- a/src/transformers/models/siglip/image_processing_siglip.py +++ b/src/transformers/models/siglip/image_processing_siglip.py @@ -32,6 +32,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -101,6 +102,20 @@ class SiglipImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean self.image_std = image_std + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def preprocess( self, @@ -174,6 +189,8 @@ class SiglipImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/swin2sr/image_processing_swin2sr.py b/src/transformers/models/swin2sr/image_processing_swin2sr.py index d86b1e28e8d..a126e6eee5e 100644 --- a/src/transformers/models/swin2sr/image_processing_swin2sr.py +++ b/src/transformers/models/swin2sr/image_processing_swin2sr.py @@ -28,6 +28,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -65,6 +66,16 @@ class Swin2SRImageProcessor(BaseImageProcessor): self.rescale_factor = rescale_factor self.do_pad = do_pad self.pad_size = pad_size + self._valid_processor_keys = [ + "images", + "do_rescale", + "rescale_factor", + "do_pad", + "pad_size", + "return_tensors", + "data_format", + "input_data_format", + ] def pad( self, @@ -161,6 +172,8 @@ class Swin2SRImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/tvlt/image_processing_tvlt.py b/src/transformers/models/tvlt/image_processing_tvlt.py index 618dcf08904..f13101c15a9 100644 --- a/src/transformers/models/tvlt/image_processing_tvlt.py +++ b/src/transformers/models/tvlt/image_processing_tvlt.py @@ -34,6 +34,7 @@ from ...image_utils import ( is_valid_image, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -151,6 +152,25 @@ class TvltImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean self.image_std = image_std + self._valid_processor_keys = [ + "videos", + "do_resize", + "size", + "patch_size", + "num_frames", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "is_mixed", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -357,6 +377,8 @@ class TvltImageProcessor(BaseImageProcessor): patch_size = patch_size if patch_size is not None else self.patch_size num_frames = num_frames if patch_size is not None else self.num_frames + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(videos): raise ValueError( "Invalid image or video type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/tvp/image_processing_tvp.py b/src/transformers/models/tvp/image_processing_tvp.py index b14e2ce264f..18600ee5fbe 100644 --- a/src/transformers/models/tvp/image_processing_tvp.py +++ b/src/transformers/models/tvp/image_processing_tvp.py @@ -36,6 +36,7 @@ from ...image_utils import ( is_valid_image, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -172,6 +173,27 @@ class TvpImageProcessor(BaseImageProcessor): self.do_flip_channel_order = do_flip_channel_order self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self._valid_processor_keys = [ + "videos", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_pad", + "pad_size", + "constant_values", + "pad_mode", + "do_normalize", + "do_flip_channel_order", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -437,6 +459,8 @@ class TvpImageProcessor(BaseImageProcessor): crop_size = crop_size if crop_size is not None else self.crop_size crop_size = get_size_dict(crop_size, param_name="crop_size") + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(videos): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/videomae/image_processing_videomae.py b/src/transformers/models/videomae/image_processing_videomae.py index dc69a57f59b..6563d69c650 100644 --- a/src/transformers/models/videomae/image_processing_videomae.py +++ b/src/transformers/models/videomae/image_processing_videomae.py @@ -35,6 +35,7 @@ from ...image_utils import ( is_valid_image, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -130,6 +131,22 @@ class VideoMAEImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self._valid_processor_keys = [ + "videos", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -311,6 +328,8 @@ class VideoMAEImageProcessor(BaseImageProcessor): crop_size = crop_size if crop_size is not None else self.crop_size crop_size = get_size_dict(crop_size, param_name="crop_size") + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(videos): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/vilt/image_processing_vilt.py b/src/transformers/models/vilt/image_processing_vilt.py index aee5e298630..42e5b3f439d 100644 --- a/src/transformers/models/vilt/image_processing_vilt.py +++ b/src/transformers/models/vilt/image_processing_vilt.py @@ -32,6 +32,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -191,6 +192,22 @@ class ViltImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD self.do_pad = do_pad + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "size_divisor", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_pad", + "return_tensors", + "data_format", + "input_data_format", + ] @classmethod def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): @@ -416,6 +433,8 @@ class ViltImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/vit/image_processing_vit.py b/src/transformers/models/vit/image_processing_vit.py index 63886bef4ca..4c7d8de714f 100644 --- a/src/transformers/models/vit/image_processing_vit.py +++ b/src/transformers/models/vit/image_processing_vit.py @@ -31,6 +31,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -95,6 +96,20 @@ class ViTImageProcessor(BaseImageProcessor): self.rescale_factor = rescale_factor self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -217,6 +232,8 @@ class ViTImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py b/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py index cf27d204456..4bb3f70b49b 100644 --- a/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py +++ b/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py @@ -36,6 +36,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -121,6 +122,23 @@ class ViTHybridImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD self.do_convert_rgb = do_convert_rgb + self._valid_processor_keys = [ + "images", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_convert_rgb", + "return_tensors", + "data_format", + "input_data_format", + ] # Copied from transformers.models.clip.image_processing_clip.CLIPImageProcessor.resize def resize( @@ -258,6 +276,8 @@ class ViTHybridImageProcessor(BaseImageProcessor): images = make_list_of_images(images) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/vitmatte/image_processing_vitmatte.py b/src/transformers/models/vitmatte/image_processing_vitmatte.py index fe7767e8e13..d7310bc0dd2 100644 --- a/src/transformers/models/vitmatte/image_processing_vitmatte.py +++ b/src/transformers/models/vitmatte/image_processing_vitmatte.py @@ -31,6 +31,7 @@ from ...image_utils import ( make_list_of_images, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -87,6 +88,20 @@ class VitMatteImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD self.size_divisibility = size_divisibility + self._valid_processor_keys = [ + "images", + "trimaps", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_pad", + "size_divisibility", + "return_tensors", + "data_format", + "input_data_format", + ] def pad_image( self, @@ -198,14 +213,14 @@ class VitMatteImageProcessor(BaseImageProcessor): images = make_list_of_images(images) trimaps = make_list_of_images(trimaps, expected_ndims=2) + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(trimaps): raise ValueError( "Invalid trimap type. Must be of type PIL.Image.Image, numpy.ndarray, " "torch.Tensor, tf.Tensor or jax.ndarray." ) - images = make_list_of_images(images) - if not valid_images(images): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/vivit/image_processing_vivit.py b/src/transformers/models/vivit/image_processing_vivit.py index 664ba6d7098..9b62aedc234 100644 --- a/src/transformers/models/vivit/image_processing_vivit.py +++ b/src/transformers/models/vivit/image_processing_vivit.py @@ -38,6 +38,7 @@ from ...image_utils import ( is_valid_image, to_numpy_array, valid_images, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import logging @@ -137,6 +138,23 @@ class VivitImageProcessor(BaseImageProcessor): self.do_normalize = do_normalize self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self._valid_processor_keys = [ + "videos", + "do_resize", + "size", + "resample", + "do_center_crop", + "crop_size", + "do_rescale", + "rescale_factor", + "offset", + "do_normalize", + "image_mean", + "image_std", + "return_tensors", + "data_format", + "input_data_format", + ] def resize( self, @@ -368,6 +386,8 @@ class VivitImageProcessor(BaseImageProcessor): crop_size = crop_size if crop_size is not None else self.crop_size crop_size = get_size_dict(crop_size, param_name="crop_size") + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + if not valid_images(videos): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py index 6ae30d50a1b..f77e27ec40d 100644 --- a/src/transformers/models/yolos/image_processing_yolos.py +++ b/src/transformers/models/yolos/image_processing_yolos.py @@ -47,6 +47,7 @@ from ...image_utils import ( to_numpy_array, valid_images, validate_annotations, + validate_kwargs, validate_preprocess_arguments, ) from ...utils import ( @@ -750,6 +751,26 @@ class YolosImageProcessor(BaseImageProcessor): self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD self.do_pad = do_pad + self._valid_processor_keys = [ + "images", + "annotations", + "return_segmentation_masks", + "masks_path", + "do_resize", + "size", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_convert_annotations", + "do_pad", + "format", + "return_tensors", + "data_format", + "input_data_format", + ] @classmethod # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.from_dict with Detr->Yolos @@ -1185,6 +1206,7 @@ class YolosImageProcessor(BaseImageProcessor): ) do_pad = self.do_pad if do_pad is None else do_pad format = self.format if format is None else format + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) images = make_list_of_images(images) diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py index dcbee270f90..90c1a4e7e12 100644 --- a/tests/test_image_processing_common.py +++ b/tests/test_image_processing_common.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import inspect import json import os import pathlib @@ -289,6 +290,16 @@ class ImageProcessingTestMixin: tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape) ) + def test_image_processor_preprocess_arguments(self): + image_processor = self.image_processing_class(**self.image_processor_dict) + if hasattr(image_processor, "_valid_processor_keys") and hasattr(image_processor, "preprocess"): + preprocess_parameter_names = inspect.getfullargspec(image_processor.preprocess).args + preprocess_parameter_names.remove("self") + preprocess_parameter_names.sort() + valid_processor_keys = image_processor._valid_processor_keys + valid_processor_keys.sort() + self.assertEqual(preprocess_parameter_names, valid_processor_keys) + class AnnotationFormatTestMixin: # this mixin adds a test to assert that usages of the