diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
index f4e53099b19..1254761a2aa 100644
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -2965,11 +2965,13 @@ else:
     )
     _import_structure["models.auto"].extend(
         [
+            "TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING",
             "TF_MODEL_FOR_CAUSAL_LM_MAPPING",
             "TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
             "TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
             "TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
             "TF_MODEL_FOR_MASKED_LM_MAPPING",
+            "TF_MODEL_FOR_MASK_GENERATION_MAPPING",
             "TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING",
             "TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING",
             "TF_MODEL_FOR_PRETRAINING_MAPPING",
@@ -6350,9 +6352,11 @@ if TYPE_CHECKING:
             TFAlbertPreTrainedModel,
         )
         from .models.auto import (
+            TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING,
             TF_MODEL_FOR_CAUSAL_LM_MAPPING,
             TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
             TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
+            TF_MODEL_FOR_MASK_GENERATION_MAPPING,
             TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
             TF_MODEL_FOR_MASKED_LM_MAPPING,
             TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING,
diff --git a/src/transformers/models/auto/__init__.py b/src/transformers/models/auto/__init__.py
index 7ea870a9331..5af79da56f7 100644
--- a/src/transformers/models/auto/__init__.py
+++ b/src/transformers/models/auto/__init__.py
@@ -114,8 +114,10 @@ except OptionalDependencyNotAvailable:
     pass
 else:
     _import_structure["modeling_tf_auto"] = [
+        "TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING",
         "TF_MODEL_FOR_CAUSAL_LM_MAPPING",
         "TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
+        "TF_MODEL_FOR_MASK_GENERATION_MAPPING",
         "TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
         "TF_MODEL_FOR_MASKED_LM_MAPPING",
         "TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING",
@@ -279,9 +281,11 @@ if TYPE_CHECKING:
         pass
     else:
         from .modeling_tf_auto import (
+            TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING,
             TF_MODEL_FOR_CAUSAL_LM_MAPPING,
             TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
             TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
+            TF_MODEL_FOR_MASK_GENERATION_MAPPING,
             TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
             TF_MODEL_FOR_MASKED_LM_MAPPING,
             TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING,
diff --git a/src/transformers/utils/dummy_tf_objects.py b/src/transformers/utils/dummy_tf_objects.py
index 4a189174eee..4da32ae6034 100644
--- a/src/transformers/utils/dummy_tf_objects.py
+++ b/src/transformers/utils/dummy_tf_objects.py
@@ -216,6 +216,9 @@ class TFAlbertPreTrainedModel(metaclass=DummyObject):
         requires_backends(self, ["tf"])
 
 
+TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING = None
+
+
 TF_MODEL_FOR_CAUSAL_LM_MAPPING = None
 
 
@@ -225,6 +228,9 @@ TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = None
 TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING = None
 
 
+TF_MODEL_FOR_MASK_GENERATION_MAPPING = None
+
+
 TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING = None
 
 
diff --git a/tests/models/autoformer/test_modeling_autoformer.py b/tests/models/autoformer/test_modeling_autoformer.py
index 9df5bf236e0..9f0434689c4 100644
--- a/tests/models/autoformer/test_modeling_autoformer.py
+++ b/tests/models/autoformer/test_modeling_autoformer.py
@@ -25,6 +25,7 @@ from transformers.testing_utils import require_torch, slow, torch_device
 
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
+from ...test_pipeline_mixin import PipelineTesterMixin
 
 
 TOLERANCE = 1e-4
@@ -201,9 +202,10 @@ class AutoformerModelTester:
 
 
 @require_torch
-class AutoformerModelTest(ModelTesterMixin, unittest.TestCase):
+class AutoformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
     all_model_classes = (AutoformerModel, AutoformerForPrediction) if is_torch_available() else ()
     all_generative_model_classes = (AutoformerForPrediction,) if is_torch_available() else ()
+    pipeline_model_mapping = {"feature-extraction": AutoformerModel} if is_torch_available() else {}
     test_pruning = False
     test_head_masking = False
     test_missing_keys = False
diff --git a/tests/models/encodec/test_modeling_encodec.py b/tests/models/encodec/test_modeling_encodec.py
index 398da6f5d09..a1693b75824 100644
--- a/tests/models/encodec/test_modeling_encodec.py
+++ b/tests/models/encodec/test_modeling_encodec.py
@@ -117,7 +117,7 @@ class EncodecModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
     test_pruning = False
     test_headmasking = False
     test_resize_embeddings = False
-    pipeline_model_mapping = {}
+    pipeline_model_mapping = {"feature-extraction": EncodecModel} if is_torch_available() else {}
     input_name = "input_values"
 
     def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
diff --git a/tests/models/git/test_modeling_git.py b/tests/models/git/test_modeling_git.py
index b6384ae15f9..45b4457fdcc 100644
--- a/tests/models/git/test_modeling_git.py
+++ b/tests/models/git/test_modeling_git.py
@@ -383,11 +383,22 @@ class GitModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
     all_model_classes = (GitModel, GitForCausalLM) if is_torch_available() else ()
     all_generative_model_classes = (GitForCausalLM,) if is_torch_available() else ()
     pipeline_model_mapping = (
-        {"feature-extraction": GitModel, "text-generation": GitForCausalLM} if is_torch_available() else {}
+        {"feature-extraction": GitModel, "image-to-text": GitForCausalLM, "text-generation": GitForCausalLM}
+        if is_torch_available()
+        else {}
     )
     fx_compatible = False
     test_torchscript = False
 
+    # `GitForCausalLM` doesn't fit into image-to-text pipeline. We might need to overwrite its `generate` function.
+    def is_pipeline_test_to_skip(
+        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+    ):
+        if pipeline_test_casse_name == "ImageToTextPipelineTests":
+            return True
+
+        return False
+
     # special case for GitForCausalLM model
     def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
         inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
index c2b7fe34ee7..7d2f35c8b94 100644
--- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
@@ -270,10 +270,7 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
         else ()
     )
     pipeline_model_mapping = (
-        {
-            "document-question-answering": LayoutLMv2ForQuestionAnswering,
-            "feature-extraction": LayoutLMv2Model,
-        }
+        {"document-question-answering": LayoutLMv2ForQuestionAnswering, "feature-extraction": LayoutLMv2Model}
         if is_torch_available()
         else {}
     )
diff --git a/tests/models/layoutlmv3/test_modeling_layoutlmv3.py b/tests/models/layoutlmv3/test_modeling_layoutlmv3.py
index c6a2a1bf37f..c458024f105 100644
--- a/tests/models/layoutlmv3/test_modeling_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_modeling_layoutlmv3.py
@@ -286,10 +286,7 @@ class LayoutLMv3ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
         else ()
     )
     pipeline_model_mapping = (
-        {
-            "document-question-answering": LayoutLMv3ForQuestionAnswering,
-            "feature-extraction": LayoutLMv3Model,
-        }
+        {"document-question-answering": LayoutLMv3ForQuestionAnswering, "feature-extraction": LayoutLMv3Model}
         if is_torch_available()
         else {}
     )
diff --git a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py
index a1e2cd59083..1bdb3e2648d 100644
--- a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py
@@ -278,13 +278,7 @@ class TFLayoutLMv3ModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Te
         else ()
     )
     pipeline_model_mapping = (
-        {
-            "feature-extraction": TFLayoutLMv3Model,
-            "question-answering": TFLayoutLMv3ForQuestionAnswering,
-            "text-classification": TFLayoutLMv3ForSequenceClassification,
-            "token-classification": TFLayoutLMv3ForTokenClassification,
-            "zero-shot": TFLayoutLMv3ForSequenceClassification,
-        }
+        {"document-question-answering": TFLayoutLMv3ForQuestionAnswering, "feature-extraction": TFLayoutLMv3Model}
         if is_tf_available()
         else {}
     )
diff --git a/tests/models/timm_backbone/test_modeling_timm_backbone.py b/tests/models/timm_backbone/test_modeling_timm_backbone.py
index f58716e0f2f..145238c6bfd 100644
--- a/tests/models/timm_backbone/test_modeling_timm_backbone.py
+++ b/tests/models/timm_backbone/test_modeling_timm_backbone.py
@@ -32,6 +32,8 @@ if is_torch_available():
 
     from transformers import TimmBackbone, TimmBackboneConfig
 
+from ...test_pipeline_mixin import PipelineTesterMixin
+
 
 class TimmBackboneModelTester:
     def __init__(
@@ -95,8 +97,9 @@ class TimmBackboneModelTester:
 
 @require_torch
 @require_timm
-class TimmBackboneModelTest(ModelTesterMixin, BackboneTesterMixin, unittest.TestCase):
+class TimmBackboneModelTest(ModelTesterMixin, BackboneTesterMixin, PipelineTesterMixin, unittest.TestCase):
     all_model_classes = (TimmBackbone,) if is_torch_available() else ()
+    pipeline_model_mapping = {"feature-extraction": TimmBackbone} if is_torch_available() else {}
     test_resize_embeddings = False
     test_head_masking = False
     test_pruning = False
diff --git a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py
index 391d8e8ce1f..3554d18957c 100644
--- a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py
+++ b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py
@@ -322,7 +322,7 @@ class TFWav2Vec2ModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
         (TFWav2Vec2Model, TFWav2Vec2ForCTC, TFWav2Vec2ForSequenceClassification) if is_tf_available() else ()
     )
     pipeline_model_mapping = (
-        {"feature-extraction": TFWav2Vec2Model, "audio-classification": TFWav2Vec2ForSequenceClassification}
+        {"audio-classification": TFWav2Vec2ForSequenceClassification, "feature-extraction": TFWav2Vec2Model}
         if is_tf_available()
         else {}
     )
diff --git a/tests/pipelines/test_pipelines_audio_classification.py b/tests/pipelines/test_pipelines_audio_classification.py
index 208690396c4..8f2e46e0a50 100644
--- a/tests/pipelines/test_pipelines_audio_classification.py
+++ b/tests/pipelines/test_pipelines_audio_classification.py
@@ -16,7 +16,7 @@ import unittest
 
 import numpy as np
 
-from transformers import MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
+from transformers import MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING, TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
 from transformers.pipelines import AudioClassificationPipeline, pipeline
 from transformers.testing_utils import (
     is_pipeline_test,
@@ -31,9 +31,9 @@ from .test_pipelines_common import ANY
 
 
 @is_pipeline_test
-@require_torch
 class AudioClassificationPipelineTests(unittest.TestCase):
     model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
+    tf_model_mapping = TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
 
     def get_test_pipeline(self, model, tokenizer, processor):
         audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=processor)
diff --git a/tests/pipelines/test_pipelines_mask_generation.py b/tests/pipelines/test_pipelines_mask_generation.py
index 53775deda28..cf170390651 100644
--- a/tests/pipelines/test_pipelines_mask_generation.py
+++ b/tests/pipelines/test_pipelines_mask_generation.py
@@ -18,7 +18,12 @@ from typing import Dict
 
 import numpy as np
 
-from transformers import MODEL_FOR_MASK_GENERATION_MAPPING, is_vision_available, pipeline
+from transformers import (
+    MODEL_FOR_MASK_GENERATION_MAPPING,
+    TF_MODEL_FOR_MASK_GENERATION_MAPPING,
+    is_vision_available,
+    pipeline,
+)
 from transformers.pipelines import MaskGenerationPipeline
 from transformers.testing_utils import (
     is_pipeline_test,
@@ -58,6 +63,9 @@ class MaskGenerationPipelineTests(unittest.TestCase):
     model_mapping = dict(
         (list(MODEL_FOR_MASK_GENERATION_MAPPING.items()) if MODEL_FOR_MASK_GENERATION_MAPPING else [])
     )
+    tf_model_mapping = dict(
+        (list(TF_MODEL_FOR_MASK_GENERATION_MAPPING.items()) if TF_MODEL_FOR_MASK_GENERATION_MAPPING else [])
+    )
 
     def get_test_pipeline(self, model, tokenizer, processor):
         image_segmenter = MaskGenerationPipeline(model=model, image_processor=processor)
@@ -66,7 +74,7 @@ class MaskGenerationPipelineTests(unittest.TestCase):
             "./tests/fixtures/tests_samples/COCO/000000039769.png",
         ]
 
-    # TODO: Fix me @Arthur
+    # TODO: Implement me @Arthur
     def run_pipeline_test(self, mask_generator, examples):
         pass
 
diff --git a/tests/test_pipeline_mixin.py b/tests/test_pipeline_mixin.py
index 05ea27121a5..1fa8f378e40 100644
--- a/tests/test_pipeline_mixin.py
+++ b/tests/test_pipeline_mixin.py
@@ -17,6 +17,7 @@ import copy
 import json
 import os
 import random
+import unittest
 from pathlib import Path
 
 from transformers.testing_utils import (
@@ -314,7 +315,6 @@ class PipelineTesterMixin:
         run_batch_test(pipeline, examples)
 
     @is_pipeline_test
-    @require_torch
     def test_pipeline_audio_classification(self):
         self.run_task_tests(task="audio-classification")
 
@@ -366,6 +366,7 @@ class PipelineTesterMixin:
     def test_pipeline_image_to_text(self):
         self.run_task_tests(task="image-to-text")
 
+    @unittest.skip(reason="`run_pipeline_test` is currently not implemented.")
     @is_pipeline_test
     @require_vision
     @require_torch
diff --git a/tests/utils/tiny_model_summary.json b/tests/utils/tiny_model_summary.json
index a186bf17e19..2d0575612d6 100644
--- a/tests/utils/tiny_model_summary.json
+++ b/tests/utils/tiny_model_summary.json
@@ -1597,7 +1597,8 @@
             "EfficientFormerImageProcessor"
         ],
         "model_classes": [
-            "EfficientFormerForImageClassification"
+            "EfficientFormerForImageClassification",
+            "TFEfficientFormerForImageClassification"
         ],
         "sha": "ebadb628e12f268e321fcc756fa4606f7b5b3178"
     },
@@ -1607,7 +1608,8 @@
             "EfficientFormerImageProcessor"
         ],
         "model_classes": [
-            "EfficientFormerForImageClassificationWithTeacher"
+            "EfficientFormerForImageClassificationWithTeacher",
+            "TFEfficientFormerForImageClassificationWithTeacher"
         ],
         "sha": "1beabce6da9cb4ebbeafcd1ef23fac36b4a269e2"
     },
@@ -1617,7 +1619,8 @@
             "EfficientFormerImageProcessor"
         ],
         "model_classes": [
-            "EfficientFormerModel"
+            "EfficientFormerModel",
+            "TFEfficientFormerModel"
         ],
         "sha": "200fae5b875844d09c8a91d1c155b72b06a517f6"
     },
@@ -1736,6 +1739,16 @@
         ],
         "sha": "312b532cbef26610d80f2bd008650160cae4f7a1"
     },
+    "EncodecModel": {
+        "tokenizer_classes": [],
+        "processor_classes": [
+            "EncodecFeatureExtractor"
+        ],
+        "model_classes": [
+            "EncodecModel"
+        ],
+        "sha": "e14c5a2fd6529c85cd4ac5a05ee9e550ced6a006"
+    },
     "EncoderDecoderModel": {
         "tokenizer_classes": [
             "BertTokenizer",
@@ -3888,6 +3901,36 @@
         ],
         "sha": "b3a1452e7cb44b600b21ee14f3d5382366855a46"
     },
+    "MobileViTV2ForImageClassification": {
+        "tokenizer_classes": [],
+        "processor_classes": [
+            "MobileViTImageProcessor"
+        ],
+        "model_classes": [
+            "MobileViTV2ForImageClassification"
+        ],
+        "sha": "25752b0967ad594341d1b685401450d7f698433c"
+    },
+    "MobileViTV2ForSemanticSegmentation": {
+        "tokenizer_classes": [],
+        "processor_classes": [
+            "MobileViTImageProcessor"
+        ],
+        "model_classes": [
+            "MobileViTV2ForSemanticSegmentation"
+        ],
+        "sha": "13b953f50be33219d55a12f1098be38b88000897"
+    },
+    "MobileViTV2Model": {
+        "tokenizer_classes": [],
+        "processor_classes": [
+            "MobileViTImageProcessor"
+        ],
+        "model_classes": [
+            "MobileViTV2Model"
+        ],
+        "sha": "2f46357659db2d6d54d870e28073deeea1c8cb64"
+    },
     "MvpForCausalLM": {
         "tokenizer_classes": [
             "MvpTokenizer",
@@ -4452,6 +4495,16 @@
         ],
         "sha": "83ec4d2d61ed62525ee033e13d144817beb29d19"
     },
+    "Pix2StructForConditionalGeneration": {
+        "tokenizer_classes": [
+            "T5TokenizerFast"
+        ],
+        "processor_classes": [
+            "Pix2StructImageProcessor"
+        ],
+        "model_classes": [],
+        "sha": "42b3de00ad535076c4893e4ac5ae2d2748cc4ccb"
+    },
     "PoolFormerForImageClassification": {
         "tokenizer_classes": [],
         "processor_classes": [
@@ -5123,7 +5176,8 @@
             "SamImageProcessor"
         ],
         "model_classes": [
-            "SamModel"
+            "SamModel",
+            "TFSamModel"
         ],
         "sha": "eca8651bc84e5ac3b1b62e784b744a6bd1b82575"
     },