Update InstructBLIP & Align values after rescale update (#25209)

* Update InstructBLIP values
Note: the tests are not independent. Running the test independentely produces different logits compared to running all the integration tests

* Update test values after rescale update

* Remove left over commented out code

* Revert to previous rescaling logic

* Update rescale tests
This commit is contained in:
amyeroberts 2023-08-03 11:01:10 +01:00 committed by GitHub
parent 15082a9dc6
commit 30409af6e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 15 additions and 15 deletions

View File

@ -155,10 +155,11 @@ class EfficientNetImageProcessor(BaseImageProcessor):
"""
Rescale an image by a scale factor.
If offset is True, the image is rescaled between [-1, 1].
image = image * scale * 2 - 1
If `offset` is `True`, the image has its values rescaled by `scale` and then offset by 1. If `scale` is
1/127.5, the image is rescaled between [-1, 1].
image = image * scale - 1
If offset is False, the image is rescaled between [0, 1].
If `offset` is `False`, and `scale` is 1/255, the image is rescaled between [0, 1].
image = image * scale
Args:
@ -171,7 +172,6 @@ class EfficientNetImageProcessor(BaseImageProcessor):
data_format (`str` or `ChannelDimension`, *optional*):
The channel dimension format of the image. If not provided, it will be the same as the input image.
"""
scale = scale * 2 if offset else scale
rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs)
if offset:

View File

@ -179,10 +179,11 @@ class VivitImageProcessor(BaseImageProcessor):
"""
Rescale an image by a scale factor.
If offset is True, the image is rescaled between [-1, 1].
image = image * scale * 2 - 1
If `offset` is `True`, the image has its values rescaled by `scale` and then offset by 1. If `scale` is
1/127.5, the image is rescaled between [-1, 1].
image = image * scale - 1
If offset is False, the image is rescaled between [0, 1].
If `offset` is `False`, and `scale` is 1/255, the image is rescaled between [0, 1].
image = image * scale
Args:
@ -195,7 +196,6 @@ class VivitImageProcessor(BaseImageProcessor):
data_format (`str` or `ChannelDimension`, *optional*):
The channel dimension format of the image. If not provided, it will be the same as the input image.
"""
scale = scale * 2 if offset else scale
rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs)
if offset:

View File

@ -200,8 +200,8 @@ class EfficientNetImageProcessorTest(ImageProcessingSavingTestMixin, unittest.Te
image_processor = self.image_processing_class(**self.image_processor_dict)
rescaled_image = image_processor.rescale(image, scale=1 / 255)
expected_image = (image * (2 / 255.0)).astype(np.float32) - 1
rescaled_image = image_processor.rescale(image, scale=1 / 127.5)
expected_image = (image * (1 / 127.5)).astype(np.float32) - 1
self.assertTrue(np.allclose(rescaled_image, expected_image))
rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False)

View File

@ -538,7 +538,7 @@ class InstructBlipModelIntegrationTest(unittest.TestCase):
logits = model(**inputs).logits
expected_slice = torch.tensor(
[[-3.5020, -12.3281, 8.4453], [-5.1406, -11.9609, 7.8711], [-4.0430, -13.4375, 9.1172]],
[[-3.4727, -11.8203, 8.3828], [-5.1172, -11.3438, 7.7656], [-4.0742, -13.4688, 9.1953]],
device=torch_device,
)
self.assertTrue(torch.allclose(logits[0, :3, :3].float(), expected_slice, atol=1e-3))
@ -548,12 +548,12 @@ class InstructBlipModelIntegrationTest(unittest.TestCase):
generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
# fmt: off
expected_outputs = [ 2, 450, 22910, 9565, 310, 445, 1967, 338, 393, 263, 767, 338, 13977, 292, 22095, 373, 278, 1250, 310, 263, 13328, 20134, 29963, 1550, 19500, 1623, 263, 19587, 4272, 11952, 29889]
expected_outputs = [2, 450, 22910, 9565, 310, 445, 1967, 338, 393, 263, 767, 338, 13977, 292, 22095, 373, 278, 1250, 310, 263, 13328, 20134, 29963, 1550, 19500, 373, 263, 19587, 4272, 11952, 29889]
# fmt: on
self.assertEqual(outputs[0].tolist(), expected_outputs)
self.assertEqual(
generated_text,
"The unusual aspect of this image is that a man is ironing clothes on the back of a yellow SUV while driving down a busy city street.",
"The unusual aspect of this image is that a man is ironing clothes on the back of a yellow SUV while driving on a busy city street.",
)
def test_inference_flant5_xl(self):

View File

@ -219,8 +219,8 @@ class VivitImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase
image_processor = self.image_processing_class(**self.image_processor_dict)
rescaled_image = image_processor.rescale(image, scale=1 / 255)
expected_image = (image * (2 / 255.0)).astype(np.float32) - 1
rescaled_image = image_processor.rescale(image, scale=1 / 127.5)
expected_image = (image * (1 / 127.5)).astype(np.float32) - 1
self.assertTrue(np.allclose(rescaled_image, expected_image))
rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False)