mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Add padding image transformation (#19838)
* Add padding transformation * Add in upstream changes * Update tests & docs * Code formatting tuples in docstring
This commit is contained in:
parent
c29a2f7c9c
commit
b98269425e
@ -29,6 +29,8 @@ Most of those are only useful if you are studying the code of the image processo
|
||||
|
||||
[[autodoc]] image_transforms.normalize
|
||||
|
||||
[[autodoc]] image_transforms.pad
|
||||
|
||||
[[autodoc]] image_transforms.rgb_to_id
|
||||
|
||||
[[autodoc]] image_transforms.rescale
|
||||
|
@ -18,7 +18,7 @@ from typing import Iterable, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from transformers.utils import TensorType
|
||||
from transformers.utils import ExplicitEnum, TensorType
|
||||
from transformers.utils.import_utils import is_flax_available, is_tf_available, is_torch_available, is_vision_available
|
||||
|
||||
|
||||
@ -569,3 +569,100 @@ def id_to_rgb(id_map):
|
||||
color.append(id_map % 256)
|
||||
id_map //= 256
|
||||
return color
|
||||
|
||||
|
||||
class PaddingMode(ExplicitEnum):
|
||||
"""
|
||||
Enum class for the different padding modes to use when padding images.
|
||||
"""
|
||||
|
||||
CONSTANT = "constant"
|
||||
REFLECT = "reflect"
|
||||
REPLICATE = "replicate"
|
||||
SYMMETRIC = "symmetric"
|
||||
|
||||
|
||||
def pad(
|
||||
image: np.ndarray,
|
||||
padding: Union[int, Tuple[int, int], Iterable[Tuple[int, int]]],
|
||||
mode: PaddingMode = PaddingMode.CONSTANT,
|
||||
constant_values: Union[float, Iterable[float]] = 0.0,
|
||||
data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Pads the `image` with the specified (height, width) `padding` and `mode`.
|
||||
|
||||
Args:
|
||||
image (`np.ndarray`):
|
||||
The image to pad.
|
||||
padding (`int` or `Tuple[int, int]` or `Iterable[Tuple[int, int]]`):
|
||||
Padding to apply to the edges of the height, width axes. Can be one of three formats:
|
||||
- `((before_height, after_height), (before_width, after_width))` unique pad widths for each axis.
|
||||
- `((before, after),)` yields same before and after pad for height and width.
|
||||
- `(pad,)` or int is a shortcut for before = after = pad width for all axes.
|
||||
mode (`PaddingMode`):
|
||||
The padding mode to use. Can be one of:
|
||||
- `"constant"`: pads with a constant value.
|
||||
- `"reflect"`: pads with the reflection of the vector mirrored on the first and last values of the
|
||||
vector along each axis.
|
||||
- `"replicate"`: pads with the replication of the last value on the edge of the array along each axis.
|
||||
- `"symmetric"`: pads with the reflection of the vector mirrored along the edge of the array.
|
||||
constant_values (`float` or `Iterable[float]`, *optional*):
|
||||
The value to use for the padding if `mode` is `"constant"`.
|
||||
data_format (`str` or `ChannelDimension`, *optional*):
|
||||
The channel dimension format for the output image. Can be one of:
|
||||
- `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
|
||||
- `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
|
||||
If unset, will use same as the input image.
|
||||
input_data_format (`str` or `ChannelDimension`, *optional*):
|
||||
The channel dimension format for the input image. Can be one of:
|
||||
- `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
|
||||
- `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
|
||||
If unset, will use the inferred format of the input image.
|
||||
|
||||
Returns:
|
||||
`np.ndarray`: The padded image.
|
||||
|
||||
"""
|
||||
if input_data_format is None:
|
||||
input_data_format = infer_channel_dimension_format(image)
|
||||
|
||||
def _expand_for_data_format(values):
|
||||
"""
|
||||
Convert values to be in the format expected by np.pad based on the data format.
|
||||
"""
|
||||
if isinstance(values, (int, float)):
|
||||
values = ((values, values), (values, values))
|
||||
elif isinstance(values, tuple) and len(values) == 1:
|
||||
values = ((values[0], values[0]), (values[0], values[0]))
|
||||
elif isinstance(values, tuple) and len(values) == 2 and isinstance(values[0], int):
|
||||
values = (values, values)
|
||||
elif isinstance(values, tuple) and len(values) == 2 and isinstance(values[0], tuple):
|
||||
values = values
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {values}")
|
||||
|
||||
# add 0 for channel dimension
|
||||
values = ((0, 0), *values) if input_data_format == ChannelDimension.FIRST else (*values, (0, 0))
|
||||
|
||||
# Add additional padding if there's a batch dimension
|
||||
values = (0, *values) if image.ndim == 4 else values
|
||||
return values
|
||||
|
||||
padding = _expand_for_data_format(padding)
|
||||
|
||||
if mode == PaddingMode.CONSTANT:
|
||||
constant_values = _expand_for_data_format(constant_values)
|
||||
image = np.pad(image, padding, mode="constant", constant_values=constant_values)
|
||||
elif mode == PaddingMode.REFLECT:
|
||||
image = np.pad(image, padding, mode="reflect")
|
||||
elif mode == PaddingMode.REPLICATE:
|
||||
image = np.pad(image, padding, mode="edge")
|
||||
elif mode == PaddingMode.SYMMETRIC:
|
||||
image = np.pad(image, padding, mode="symmetric")
|
||||
else:
|
||||
raise ValueError(f"Invalid padding mode: {mode}")
|
||||
|
||||
image = to_channel_dimension_format(image, data_format) if data_format is not None else image
|
||||
return image
|
||||
|
@ -41,6 +41,7 @@ if is_vision_available():
|
||||
get_resize_output_image_size,
|
||||
id_to_rgb,
|
||||
normalize,
|
||||
pad,
|
||||
resize,
|
||||
rgb_to_id,
|
||||
to_channel_dimension_format,
|
||||
@ -289,3 +290,127 @@ class ImageTransformsTester(unittest.TestCase):
|
||||
]
|
||||
)
|
||||
self.assertTrue(np.allclose(id_to_rgb(id_array), color))
|
||||
|
||||
def test_pad(self):
|
||||
# fmt: off
|
||||
image = np.array([[
|
||||
[0, 1],
|
||||
[2, 3],
|
||||
]])
|
||||
# fmt: on
|
||||
|
||||
# Test that exception is raised if unknown padding mode is specified
|
||||
with self.assertRaises(ValueError):
|
||||
pad(image, 10, mode="unknown")
|
||||
|
||||
# Test that exception is raised if invalid padding is specified
|
||||
with self.assertRaises(ValueError):
|
||||
# Cannot pad on channel dimension
|
||||
pad(image, (5, 10, 10))
|
||||
|
||||
# Test image is padded equally on all sides is padding is an int
|
||||
# fmt: off
|
||||
expected_image = np.array([
|
||||
[[0, 0, 0, 0],
|
||||
[0, 0, 1, 0],
|
||||
[0, 2, 3, 0],
|
||||
[0, 0, 0, 0]],
|
||||
])
|
||||
# fmt: on
|
||||
self.assertTrue(np.allclose(expected_image, pad(image, 1)))
|
||||
|
||||
# Test the left and right of each axis is padded (pad_left, pad_right)
|
||||
# fmt: off
|
||||
expected_image = np.array(
|
||||
[[0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 1, 0],
|
||||
[0, 0, 2, 3, 0],
|
||||
[0, 0, 0, 0, 0]])
|
||||
# fmt: on
|
||||
self.assertTrue(np.allclose(expected_image, pad(image, (2, 1))))
|
||||
|
||||
# Test only one axis is padded (pad_left, pad_right)
|
||||
# fmt: off
|
||||
expected_image = np.array([[
|
||||
[9, 9],
|
||||
[9, 9],
|
||||
[0, 1],
|
||||
[2, 3],
|
||||
[9, 9]
|
||||
]])
|
||||
# fmt: on
|
||||
self.assertTrue(np.allclose(expected_image, pad(image, ((2, 1), (0, 0)), constant_values=9)))
|
||||
|
||||
# Test padding with a constant value
|
||||
# fmt: off
|
||||
expected_image = np.array([[
|
||||
[8, 8, 0, 1, 9],
|
||||
[8, 8, 2, 3, 9],
|
||||
[8, 8, 7, 7, 9],
|
||||
[8, 8, 7, 7, 9]
|
||||
]])
|
||||
# fmt: on
|
||||
self.assertTrue(np.allclose(expected_image, pad(image, ((0, 2), (2, 1)), constant_values=((6, 7), (8, 9)))))
|
||||
|
||||
# fmt: off
|
||||
image = np.array([[
|
||||
[0, 1, 2],
|
||||
[3, 4, 5],
|
||||
[6, 7, 8],
|
||||
]])
|
||||
# fmt: on
|
||||
|
||||
# Test padding with PaddingMode.REFLECT
|
||||
# fmt: off
|
||||
expected_image = np.array([[
|
||||
[2, 1, 0, 1, 2, 1],
|
||||
[5, 4, 3, 4, 5, 4],
|
||||
[8, 7, 6, 7, 8, 7],
|
||||
[5, 4, 3, 4, 5, 4],
|
||||
[2, 1, 0, 1, 2, 1],
|
||||
]])
|
||||
# fmt: on
|
||||
self.assertTrue(np.allclose(expected_image, pad(image, ((0, 2), (2, 1)), mode="reflect")))
|
||||
|
||||
# Test padding with PaddingMode.REPLICATE
|
||||
# fmt: off
|
||||
expected_image = np.array([[
|
||||
[0, 0, 0, 1, 2, 2],
|
||||
[3, 3, 3, 4, 5, 5],
|
||||
[6, 6, 6, 7, 8, 8],
|
||||
[6, 6, 6, 7, 8, 8],
|
||||
[6, 6, 6, 7, 8, 8],
|
||||
]])
|
||||
# fmt: on
|
||||
self.assertTrue(np.allclose(expected_image, pad(image, ((0, 2), (2, 1)), mode="replicate")))
|
||||
|
||||
# Test padding with PaddingMode.SYMMETRIC
|
||||
# fmt: off
|
||||
expected_image = np.array([[
|
||||
[1, 0, 0, 1, 2, 2],
|
||||
[4, 3, 3, 4, 5, 5],
|
||||
[7, 6, 6, 7, 8, 8],
|
||||
[7, 6, 6, 7, 8, 8],
|
||||
[4, 3, 3, 4, 5, 5],
|
||||
]])
|
||||
# fmt: on
|
||||
self.assertTrue(np.allclose(expected_image, pad(image, ((0, 2), (2, 1)), mode="symmetric")))
|
||||
|
||||
# Test we can specify the output data format
|
||||
# Test padding with PaddingMode.REFLECT
|
||||
# fmt: off
|
||||
image = np.array([[
|
||||
[0, 1],
|
||||
[2, 3],
|
||||
]])
|
||||
expected_image = np.array([
|
||||
[[0], [1], [0], [1], [0]],
|
||||
[[2], [3], [2], [3], [2]],
|
||||
[[0], [1], [0], [1], [0]],
|
||||
[[2], [3], [2], [3], [2]]
|
||||
])
|
||||
# fmt: on
|
||||
self.assertTrue(
|
||||
np.allclose(expected_image, pad(image, ((0, 2), (2, 1)), mode="reflect", data_format="channels_last"))
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user