Update InstructBLIP & Align values after rescale update (#25209)

* Update InstructBLIP values
Note: the tests are not independent. Running the test independentely produces different logits compared to running all the integration tests

* Update test values after rescale update

* Remove left over commented out code

* Revert to previous rescaling logic

* Update rescale tests
This commit is contained in:
amyeroberts 2023-08-03 11:01:10 +01:00 committed by GitHub
parent 15082a9dc6
commit 30409af6e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 15 additions and 15 deletions

View File

@ -155,10 +155,11 @@ class EfficientNetImageProcessor(BaseImageProcessor):
""" """
Rescale an image by a scale factor. Rescale an image by a scale factor.
If offset is True, the image is rescaled between [-1, 1]. If `offset` is `True`, the image has its values rescaled by `scale` and then offset by 1. If `scale` is
image = image * scale * 2 - 1 1/127.5, the image is rescaled between [-1, 1].
image = image * scale - 1
If offset is False, the image is rescaled between [0, 1]. If `offset` is `False`, and `scale` is 1/255, the image is rescaled between [0, 1].
image = image * scale image = image * scale
Args: Args:
@ -171,7 +172,6 @@ class EfficientNetImageProcessor(BaseImageProcessor):
data_format (`str` or `ChannelDimension`, *optional*): data_format (`str` or `ChannelDimension`, *optional*):
The channel dimension format of the image. If not provided, it will be the same as the input image. The channel dimension format of the image. If not provided, it will be the same as the input image.
""" """
scale = scale * 2 if offset else scale
rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs) rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs)
if offset: if offset:

View File

@ -179,10 +179,11 @@ class VivitImageProcessor(BaseImageProcessor):
""" """
Rescale an image by a scale factor. Rescale an image by a scale factor.
If offset is True, the image is rescaled between [-1, 1]. If `offset` is `True`, the image has its values rescaled by `scale` and then offset by 1. If `scale` is
image = image * scale * 2 - 1 1/127.5, the image is rescaled between [-1, 1].
image = image * scale - 1
If offset is False, the image is rescaled between [0, 1]. If `offset` is `False`, and `scale` is 1/255, the image is rescaled between [0, 1].
image = image * scale image = image * scale
Args: Args:
@ -195,7 +196,6 @@ class VivitImageProcessor(BaseImageProcessor):
data_format (`str` or `ChannelDimension`, *optional*): data_format (`str` or `ChannelDimension`, *optional*):
The channel dimension format of the image. If not provided, it will be the same as the input image. The channel dimension format of the image. If not provided, it will be the same as the input image.
""" """
scale = scale * 2 if offset else scale
rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs) rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs)
if offset: if offset:

View File

@ -200,8 +200,8 @@ class EfficientNetImageProcessorTest(ImageProcessingSavingTestMixin, unittest.Te
image_processor = self.image_processing_class(**self.image_processor_dict) image_processor = self.image_processing_class(**self.image_processor_dict)
rescaled_image = image_processor.rescale(image, scale=1 / 255) rescaled_image = image_processor.rescale(image, scale=1 / 127.5)
expected_image = (image * (2 / 255.0)).astype(np.float32) - 1 expected_image = (image * (1 / 127.5)).astype(np.float32) - 1
self.assertTrue(np.allclose(rescaled_image, expected_image)) self.assertTrue(np.allclose(rescaled_image, expected_image))
rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False) rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False)

View File

@ -538,7 +538,7 @@ class InstructBlipModelIntegrationTest(unittest.TestCase):
logits = model(**inputs).logits logits = model(**inputs).logits
expected_slice = torch.tensor( expected_slice = torch.tensor(
[[-3.5020, -12.3281, 8.4453], [-5.1406, -11.9609, 7.8711], [-4.0430, -13.4375, 9.1172]], [[-3.4727, -11.8203, 8.3828], [-5.1172, -11.3438, 7.7656], [-4.0742, -13.4688, 9.1953]],
device=torch_device, device=torch_device,
) )
self.assertTrue(torch.allclose(logits[0, :3, :3].float(), expected_slice, atol=1e-3)) self.assertTrue(torch.allclose(logits[0, :3, :3].float(), expected_slice, atol=1e-3))
@ -548,12 +548,12 @@ class InstructBlipModelIntegrationTest(unittest.TestCase):
generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip() generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
# fmt: off # fmt: off
expected_outputs = [ 2, 450, 22910, 9565, 310, 445, 1967, 338, 393, 263, 767, 338, 13977, 292, 22095, 373, 278, 1250, 310, 263, 13328, 20134, 29963, 1550, 19500, 1623, 263, 19587, 4272, 11952, 29889] expected_outputs = [2, 450, 22910, 9565, 310, 445, 1967, 338, 393, 263, 767, 338, 13977, 292, 22095, 373, 278, 1250, 310, 263, 13328, 20134, 29963, 1550, 19500, 373, 263, 19587, 4272, 11952, 29889]
# fmt: on # fmt: on
self.assertEqual(outputs[0].tolist(), expected_outputs) self.assertEqual(outputs[0].tolist(), expected_outputs)
self.assertEqual( self.assertEqual(
generated_text, generated_text,
"The unusual aspect of this image is that a man is ironing clothes on the back of a yellow SUV while driving down a busy city street.", "The unusual aspect of this image is that a man is ironing clothes on the back of a yellow SUV while driving on a busy city street.",
) )
def test_inference_flant5_xl(self): def test_inference_flant5_xl(self):

View File

@ -219,8 +219,8 @@ class VivitImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase
image_processor = self.image_processing_class(**self.image_processor_dict) image_processor = self.image_processing_class(**self.image_processor_dict)
rescaled_image = image_processor.rescale(image, scale=1 / 255) rescaled_image = image_processor.rescale(image, scale=1 / 127.5)
expected_image = (image * (2 / 255.0)).astype(np.float32) - 1 expected_image = (image * (1 / 127.5)).astype(np.float32) - 1
self.assertTrue(np.allclose(rescaled_image, expected_image)) self.assertTrue(np.allclose(rescaled_image, expected_image))
rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False) rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False)