mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-16 19:18:24 +06:00
Update Granite Vision Model Path / Tests (#35998)
* Update granite vision model path Signed-off-by: Alex-Brooks <Alex.brooks@ibm.com> * Enable granite vision test Signed-off-by: Alex-Brooks <Alex.brooks@ibm.com> --------- Signed-off-by: Alex-Brooks <Alex.brooks@ibm.com>
This commit is contained in:
parent
9d2056f12b
commit
e284c7e954
@ -31,13 +31,8 @@ Tips:
|
|||||||
Sample inference:
|
Sample inference:
|
||||||
```python
|
```python
|
||||||
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
|
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
|
||||||
from PIL import Image
|
|
||||||
import requests
|
|
||||||
|
|
||||||
# Note: These docs were written prior to the public model release,
|
model_path = "ibm-granite/granite-vision-3.1-2b-preview"
|
||||||
# and this path is subject to change.
|
|
||||||
# Please see https://huggingface.co/ibm-granite for the current model list.
|
|
||||||
model_path = "ibm-granite/granite-3.1-2b-instruct-vision"
|
|
||||||
processor = LlavaNextProcessor.from_pretrained(model_path)
|
processor = LlavaNextProcessor.from_pretrained(model_path)
|
||||||
|
|
||||||
model = LlavaNextForConditionalGeneration.from_pretrained(model_path).to("cuda")
|
model = LlavaNextForConditionalGeneration.from_pretrained(model_path).to("cuda")
|
||||||
|
@ -586,15 +586,13 @@ class LlavaNextForConditionalGenerationIntegrationTest(unittest.TestCase):
|
|||||||
EXPECTED_DECODED_TEXT,
|
EXPECTED_DECODED_TEXT,
|
||||||
)
|
)
|
||||||
|
|
||||||
@unittest.skip(reason="Granite multimodal [vision] models are not yet released")
|
|
||||||
@slow
|
@slow
|
||||||
def test_granite_vision(self):
|
def test_granite_vision(self):
|
||||||
"""
|
"""
|
||||||
Check the expected output of a granite vision model, which leverages
|
Check the expected output of a granite vision model, which leverages
|
||||||
multiple vision feature layers and a visual encoder with no CLS (siglip).
|
multiple vision feature layers and a visual encoder with no CLS (siglip).
|
||||||
"""
|
"""
|
||||||
# TODO @alex-jw-brooks - update the path and enable this test once the 2b model is released
|
granite_model_path = "ibm-granite/granite-vision-3.1-2b-preview"
|
||||||
granite_model_path = "llava-granite-2b"
|
|
||||||
model = LlavaNextForConditionalGeneration.from_pretrained(granite_model_path)
|
model = LlavaNextForConditionalGeneration.from_pretrained(granite_model_path)
|
||||||
self.processor = AutoProcessor.from_pretrained(granite_model_path)
|
self.processor = AutoProcessor.from_pretrained(granite_model_path)
|
||||||
prompt = "<|user|>\n<image>\nWhat is shown in this image?\n<|assistant|>\n"
|
prompt = "<|user|>\n<image>\nWhat is shown in this image?\n<|assistant|>\n"
|
||||||
@ -602,7 +600,7 @@ class LlavaNextForConditionalGenerationIntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
# verify generation
|
# verify generation
|
||||||
output = model.generate(**inputs, max_new_tokens=30)
|
output = model.generate(**inputs, max_new_tokens=30)
|
||||||
EXPECTED_DECODED_TEXT = "<|user|>\n\nWhat is shown in this image?\n<|assistant|>\nThe image depicts a diagram."
|
EXPECTED_DECODED_TEXT = "<|user|>\n\nWhat is shown in this image?\n<|assistant|>\nThe image displays a radar chart comparing the performance of various machine learning models." # fmt: skip
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.processor.decode(output[0], skip_special_tokens=True),
|
self.processor.decode(output[0], skip_special_tokens=True),
|
||||||
EXPECTED_DECODED_TEXT,
|
EXPECTED_DECODED_TEXT,
|
||||||
|
Loading…
Reference in New Issue
Block a user