From 015b6dfbf80fd0c958f8f27e8e78c89fd9a11568 Mon Sep 17 00:00:00 2001 From: Sebastiaan Vermeulen Date: Thu, 8 May 2025 11:51:15 +0200 Subject: [PATCH] Fix `pad` image transform for batched inputs (#37544) * fix * add batch dimension to expected output --- src/transformers/image_transforms.py | 2 +- tests/test_image_transforms.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/transformers/image_transforms.py b/src/transformers/image_transforms.py index 5b0ba3f9122..c8a7edd985c 100644 --- a/src/transformers/image_transforms.py +++ b/src/transformers/image_transforms.py @@ -751,7 +751,7 @@ def pad( values = ((0, 0), *values) if input_data_format == ChannelDimension.FIRST else (*values, (0, 0)) # Add additional padding if there's a batch dimension - values = (0, *values) if image.ndim == 4 else values + values = ((0, 0), *values) if image.ndim == 4 else values return values padding = _expand_for_data_format(padding) diff --git a/tests/test_image_transforms.py b/tests/test_image_transforms.py index 66b94c4ee72..3d3b84c7e81 100644 --- a/tests/test_image_transforms.py +++ b/tests/test_image_transforms.py @@ -578,6 +578,25 @@ class ImageTransformsTester(unittest.TestCase): ) ) + # Test that padding works on batched images + image = np.array( + [ + [[0, 1], [2, 3]], + ] + )[None, ...] + expected_image = np.array( + [ + [[0, 0], [0, 1], [2, 3]], + [[0, 0], [0, 0], [0, 0]], + ] + )[None, ...] + # fmt: on + self.assertTrue( + np.allclose( + expected_image, pad(image, ((0, 1), (1, 0)), mode="constant", input_data_format="channels_last") + ) + ) + @require_vision def test_convert_to_rgb(self): # Test that an RGBA image is converted to RGB