From e284c7e954abe12c34b50461c17f8115a0afe115 Mon Sep 17 00:00:00 2001
From: Alex Brooks <alex.brooks@ibm.com>
Date: Mon, 3 Feb 2025 12:06:03 -0700
Subject: [PATCH] Update Granite Vision Model Path / Tests (#35998)

* Update granite vision model path

Signed-off-by: Alex-Brooks <Alex.brooks@ibm.com>

* Enable granite vision test

Signed-off-by: Alex-Brooks <Alex.brooks@ibm.com>

---------

Signed-off-by: Alex-Brooks <Alex.brooks@ibm.com>
---
 docs/source/en/model_doc/granitevision.md           | 7 +------
 tests/models/llava_next/test_modeling_llava_next.py | 6 ++----
 2 files changed, 3 insertions(+), 10 deletions(-)
diff --git a/docs/source/en/model_doc/granitevision.md b/docs/source/en/model_doc/granitevision.md
index 42f9df2ee31..e11c806ae67 100644
--- a/docs/source/en/model_doc/granitevision.md
+++ b/docs/source/en/model_doc/granitevision.md
@@ -31,13 +31,8 @@ Tips:
 Sample inference:
 ```python
 from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
-from PIL import Image
-import requests
 
-# Note: These docs were written prior to the public model release,
-# and this path is subject to change.
-# Please see https://huggingface.co/ibm-granite for the current model list.
-model_path = "ibm-granite/granite-3.1-2b-instruct-vision"
+model_path = "ibm-granite/granite-vision-3.1-2b-preview"
 processor = LlavaNextProcessor.from_pretrained(model_path)
 
 model = LlavaNextForConditionalGeneration.from_pretrained(model_path).to("cuda")
diff --git a/tests/models/llava_next/test_modeling_llava_next.py b/tests/models/llava_next/test_modeling_llava_next.py
index ce86a569581..acfd3fde631 100644
--- a/tests/models/llava_next/test_modeling_llava_next.py
+++ b/tests/models/llava_next/test_modeling_llava_next.py
@@ -586,15 +586,13 @@ class LlavaNextForConditionalGenerationIntegrationTest(unittest.TestCase):
             EXPECTED_DECODED_TEXT,
         )
 
-    @unittest.skip(reason="Granite multimodal [vision] models are not yet released")
     @slow
     def test_granite_vision(self):
         """
         Check the expected output of a granite vision model, which leverages
         multiple vision feature layers and a visual encoder with no CLS (siglip).
         """
-        # TODO @alex-jw-brooks - update the path and enable this test once the 2b model is released
-        granite_model_path = "llava-granite-2b"
+        granite_model_path = "ibm-granite/granite-vision-3.1-2b-preview"
         model = LlavaNextForConditionalGeneration.from_pretrained(granite_model_path)
         self.processor = AutoProcessor.from_pretrained(granite_model_path)
         prompt = "<|user|>\n<image>\nWhat is shown in this image?\n<|assistant|>\n"
@@ -602,7 +600,7 @@ class LlavaNextForConditionalGenerationIntegrationTest(unittest.TestCase):
 
         # verify generation
         output = model.generate(**inputs, max_new_tokens=30)
-        EXPECTED_DECODED_TEXT = "<|user|>\n\nWhat is shown in this image?\n<|assistant|>\nThe image depicts a diagram."
+        EXPECTED_DECODED_TEXT = "<|user|>\n\nWhat is shown in this image?\n<|assistant|>\nThe image displays a radar chart comparing the performance of various machine learning models."  # fmt: skip
         self.assertEqual(
             self.processor.decode(output[0], skip_special_tokens=True),
             EXPECTED_DECODED_TEXT,