mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-30 17:52:35 +06:00
Update InstructBLIP & Align values after rescale update (#25209)
* Update InstructBLIP values Note: the tests are not independent. Running the test independentely produces different logits compared to running all the integration tests * Update test values after rescale update * Remove left over commented out code * Revert to previous rescaling logic * Update rescale tests
This commit is contained in:
parent
15082a9dc6
commit
30409af6e1
@ -155,10 +155,11 @@ class EfficientNetImageProcessor(BaseImageProcessor):
|
||||
"""
|
||||
Rescale an image by a scale factor.
|
||||
|
||||
If offset is True, the image is rescaled between [-1, 1].
|
||||
image = image * scale * 2 - 1
|
||||
If `offset` is `True`, the image has its values rescaled by `scale` and then offset by 1. If `scale` is
|
||||
1/127.5, the image is rescaled between [-1, 1].
|
||||
image = image * scale - 1
|
||||
|
||||
If offset is False, the image is rescaled between [0, 1].
|
||||
If `offset` is `False`, and `scale` is 1/255, the image is rescaled between [0, 1].
|
||||
image = image * scale
|
||||
|
||||
Args:
|
||||
@ -171,7 +172,6 @@ class EfficientNetImageProcessor(BaseImageProcessor):
|
||||
data_format (`str` or `ChannelDimension`, *optional*):
|
||||
The channel dimension format of the image. If not provided, it will be the same as the input image.
|
||||
"""
|
||||
scale = scale * 2 if offset else scale
|
||||
rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs)
|
||||
|
||||
if offset:
|
||||
|
@ -179,10 +179,11 @@ class VivitImageProcessor(BaseImageProcessor):
|
||||
"""
|
||||
Rescale an image by a scale factor.
|
||||
|
||||
If offset is True, the image is rescaled between [-1, 1].
|
||||
image = image * scale * 2 - 1
|
||||
If `offset` is `True`, the image has its values rescaled by `scale` and then offset by 1. If `scale` is
|
||||
1/127.5, the image is rescaled between [-1, 1].
|
||||
image = image * scale - 1
|
||||
|
||||
If offset is False, the image is rescaled between [0, 1].
|
||||
If `offset` is `False`, and `scale` is 1/255, the image is rescaled between [0, 1].
|
||||
image = image * scale
|
||||
|
||||
Args:
|
||||
@ -195,7 +196,6 @@ class VivitImageProcessor(BaseImageProcessor):
|
||||
data_format (`str` or `ChannelDimension`, *optional*):
|
||||
The channel dimension format of the image. If not provided, it will be the same as the input image.
|
||||
"""
|
||||
scale = scale * 2 if offset else scale
|
||||
rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs)
|
||||
|
||||
if offset:
|
||||
|
@ -200,8 +200,8 @@ class EfficientNetImageProcessorTest(ImageProcessingSavingTestMixin, unittest.Te
|
||||
|
||||
image_processor = self.image_processing_class(**self.image_processor_dict)
|
||||
|
||||
rescaled_image = image_processor.rescale(image, scale=1 / 255)
|
||||
expected_image = (image * (2 / 255.0)).astype(np.float32) - 1
|
||||
rescaled_image = image_processor.rescale(image, scale=1 / 127.5)
|
||||
expected_image = (image * (1 / 127.5)).astype(np.float32) - 1
|
||||
self.assertTrue(np.allclose(rescaled_image, expected_image))
|
||||
|
||||
rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False)
|
||||
|
@ -538,7 +538,7 @@ class InstructBlipModelIntegrationTest(unittest.TestCase):
|
||||
logits = model(**inputs).logits
|
||||
|
||||
expected_slice = torch.tensor(
|
||||
[[-3.5020, -12.3281, 8.4453], [-5.1406, -11.9609, 7.8711], [-4.0430, -13.4375, 9.1172]],
|
||||
[[-3.4727, -11.8203, 8.3828], [-5.1172, -11.3438, 7.7656], [-4.0742, -13.4688, 9.1953]],
|
||||
device=torch_device,
|
||||
)
|
||||
self.assertTrue(torch.allclose(logits[0, :3, :3].float(), expected_slice, atol=1e-3))
|
||||
@ -548,12 +548,12 @@ class InstructBlipModelIntegrationTest(unittest.TestCase):
|
||||
generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
|
||||
|
||||
# fmt: off
|
||||
expected_outputs = [ 2, 450, 22910, 9565, 310, 445, 1967, 338, 393, 263, 767, 338, 13977, 292, 22095, 373, 278, 1250, 310, 263, 13328, 20134, 29963, 1550, 19500, 1623, 263, 19587, 4272, 11952, 29889]
|
||||
expected_outputs = [2, 450, 22910, 9565, 310, 445, 1967, 338, 393, 263, 767, 338, 13977, 292, 22095, 373, 278, 1250, 310, 263, 13328, 20134, 29963, 1550, 19500, 373, 263, 19587, 4272, 11952, 29889]
|
||||
# fmt: on
|
||||
self.assertEqual(outputs[0].tolist(), expected_outputs)
|
||||
self.assertEqual(
|
||||
generated_text,
|
||||
"The unusual aspect of this image is that a man is ironing clothes on the back of a yellow SUV while driving down a busy city street.",
|
||||
"The unusual aspect of this image is that a man is ironing clothes on the back of a yellow SUV while driving on a busy city street.",
|
||||
)
|
||||
|
||||
def test_inference_flant5_xl(self):
|
||||
|
@ -219,8 +219,8 @@ class VivitImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase
|
||||
|
||||
image_processor = self.image_processing_class(**self.image_processor_dict)
|
||||
|
||||
rescaled_image = image_processor.rescale(image, scale=1 / 255)
|
||||
expected_image = (image * (2 / 255.0)).astype(np.float32) - 1
|
||||
rescaled_image = image_processor.rescale(image, scale=1 / 127.5)
|
||||
expected_image = (image * (1 / 127.5)).astype(np.float32) - 1
|
||||
self.assertTrue(np.allclose(rescaled_image, expected_image))
|
||||
|
||||
rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False)
|
||||
|
Loading…
Reference in New Issue
Block a user