diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile
index 4db6f51826f..0dd64f86d30 100644
--- a/docker/transformers-all-latest-gpu/Dockerfile
+++ b/docker/transformers-all-latest-gpu/Dockerfile
@@ -33,6 +33,7 @@ RUN echo torch=$VERSION
 RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
 
 RUN python3 -m pip install --no-cache-dir -U tensorflow
+RUN python3 -m pip install --no-cache-dir -U tensorflow_probability
 RUN python3 -m pip uninstall -y flax jax
 
 # Use installed torch version for `torch-scatter` to avid to deal with PYTORCH='pre'.
diff --git a/tests/models/groupvit/test_modeling_tf_groupvit.py b/tests/models/groupvit/test_modeling_tf_groupvit.py
index 8c4053a2c73..45bc8b8ec3b 100644
--- a/tests/models/groupvit/test_modeling_tf_groupvit.py
+++ b/tests/models/groupvit/test_modeling_tf_groupvit.py
@@ -26,7 +26,13 @@ import numpy as np
 
 import requests
 from transformers import GroupViTConfig, GroupViTTextConfig, GroupViTVisionConfig
-from transformers.testing_utils import is_pt_tf_cross_test, require_tf, require_vision, slow
+from transformers.testing_utils import (
+    is_pt_tf_cross_test,
+    require_tensorflow_probability,
+    require_tf,
+    require_vision,
+    slow,
+)
 from transformers.utils import is_tf_available, is_vision_available
 
 from ...test_configuration_common import ConfigTester
@@ -155,6 +161,16 @@ class TFGroupViTVisionModelTest(TFModelTesterMixin, unittest.TestCase):
     def test_inputs_embeds(self):
         pass
 
+    """
+    During saving, TensorFlow will also run with `training=True` which trigger `gumbel_softmax` that requires
+    `tensorflow-probability`.
+    """
+
+    @require_tensorflow_probability
+    @slow
+    def test_saved_model_creation(self):
+        super().test_saved_model_creation()
+
     @unittest.skip(reason="GroupViT does not use inputs_embeds")
     def test_graph_mode_with_inputs_embeds(self):
         pass
@@ -295,6 +311,10 @@ class TFGroupViTVisionModelTest(TFModelTesterMixin, unittest.TestCase):
             model = TFGroupViTVisionModel.from_pretrained(model_name)
             self.assertIsNotNone(model)
 
+    @unittest.skip(
+        "TFGroupViTVisionModel does not convert `hidden_states` and `attentions` to tensors as they are all of"
+        " different dimensions, and we get `Got a non-Tensor value` error when saving the model."
+    )
     @slow
     def test_saved_model_creation_extended(self):
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -578,6 +598,10 @@ class TFGroupViTModelTest(TFModelTesterMixin, unittest.TestCase):
     def test_model_common_attributes(self):
         pass
 
+    @require_tensorflow_probability
+    def test_keras_fit(self):
+        super().test_keras_fit()
+
     @is_pt_tf_cross_test
     def test_pt_tf_model_equivalence(self):
         # `GroupViT` computes some indices using argmax, uses them as