Update tiny models for pipeline testing. (#24364)

* fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-07-03 12:50:06 +06:00 · 2023-06-20 14:43:10 +02:00 · 2023-06-20 14:43:10 +02:00 · c23d131eab
commit c23d131eab
parent 56efbf4301
15 changed files with 110 additions and 29 deletions
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@ -2965,11 +2965,13 @@ else:
    )
    _import_structure["models.auto"].extend(
        [
+            "TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING",
            "TF_MODEL_FOR_CAUSAL_LM_MAPPING",
            "TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
            "TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
            "TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
            "TF_MODEL_FOR_MASKED_LM_MAPPING",
+            "TF_MODEL_FOR_MASK_GENERATION_MAPPING",
            "TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING",
            "TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING",
            "TF_MODEL_FOR_PRETRAINING_MAPPING",
@ -6350,9 +6352,11 @@ if TYPE_CHECKING:
            TFAlbertPreTrainedModel,
        )
        from .models.auto import (
+            TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING,
            TF_MODEL_FOR_CAUSAL_LM_MAPPING,
            TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
            TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
+            TF_MODEL_FOR_MASK_GENERATION_MAPPING,
            TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
            TF_MODEL_FOR_MASKED_LM_MAPPING,
            TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING,
--- a/src/transformers/models/auto/init.py
+++ b/src/transformers/models/auto/init.py
@ -114,8 +114,10 @@ except OptionalDependencyNotAvailable:
    pass
 else:
    _import_structure["modeling_tf_auto"] = [
+        "TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING",
        "TF_MODEL_FOR_CAUSAL_LM_MAPPING",
        "TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
+        "TF_MODEL_FOR_MASK_GENERATION_MAPPING",
        "TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
        "TF_MODEL_FOR_MASKED_LM_MAPPING",
        "TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING",
@ -279,9 +281,11 @@ if TYPE_CHECKING:
        pass
    else:
        from .modeling_tf_auto import (
+            TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING,
            TF_MODEL_FOR_CAUSAL_LM_MAPPING,
            TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
            TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
+            TF_MODEL_FOR_MASK_GENERATION_MAPPING,
            TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
            TF_MODEL_FOR_MASKED_LM_MAPPING,
            TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING,
--- a/src/transformers/utils/dummy_tf_objects.py
+++ b/src/transformers/utils/dummy_tf_objects.py
@ -216,6 +216,9 @@ class TFAlbertPreTrainedModel(metaclass=DummyObject):
        requires_backends(self, ["tf"])


+TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING = None
+
+
 TF_MODEL_FOR_CAUSAL_LM_MAPPING = None


@ -225,6 +228,9 @@ TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = None
 TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING = None


+TF_MODEL_FOR_MASK_GENERATION_MAPPING = None
+
+
 TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING = None


--- a/tests/models/autoformer/test_modeling_autoformer.py
+++ b/tests/models/autoformer/test_modeling_autoformer.py
@ -25,6 +25,7 @@ from transformers.testing_utils import require_torch, slow, torch_device

 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
+from ...test_pipeline_mixin import PipelineTesterMixin


 TOLERANCE = 1e-4
@ -201,9 +202,10 @@ class AutoformerModelTester:


@require_torch
-class AutoformerModelTest(ModelTesterMixin, unittest.TestCase):
+class AutoformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (AutoformerModel, AutoformerForPrediction) if is_torch_available() else ()
    all_generative_model_classes = (AutoformerForPrediction,) if is_torch_available() else ()
+    pipeline_model_mapping = {"feature-extraction": AutoformerModel} if is_torch_available() else {}
    test_pruning = False
    test_head_masking = False
    test_missing_keys = False
--- a/tests/models/encodec/test_modeling_encodec.py
+++ b/tests/models/encodec/test_modeling_encodec.py
@ -117,7 +117,7 @@ class EncodecModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    test_pruning = False
    test_headmasking = False
    test_resize_embeddings = False
-    pipeline_model_mapping = {}
+    pipeline_model_mapping = {"feature-extraction": EncodecModel} if is_torch_available() else {}
    input_name = "input_values"

    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
--- a/tests/models/git/test_modeling_git.py
+++ b/tests/models/git/test_modeling_git.py
@ -383,11 +383,22 @@ class GitModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
    all_model_classes = (GitModel, GitForCausalLM) if is_torch_available() else ()
    all_generative_model_classes = (GitForCausalLM,) if is_torch_available() else ()
    pipeline_model_mapping = (
-        {"feature-extraction": GitModel, "text-generation": GitForCausalLM} if is_torch_available() else {}
+        {"feature-extraction": GitModel, "image-to-text": GitForCausalLM, "text-generation": GitForCausalLM}
+        if is_torch_available()
+        else {}
    )
    fx_compatible = False
    test_torchscript = False

+    # `GitForCausalLM` doesn't fit into image-to-text pipeline. We might need to overwrite its `generate` function.
+    def is_pipeline_test_to_skip(
+        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+    ):
+        if pipeline_test_casse_name == "ImageToTextPipelineTests":
+            return True
+
+        return False
+
    # special case for GitForCausalLM model
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
        inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
--- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
@ -270,10 +270,7 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
        else ()
    )
    pipeline_model_mapping = (
-        {
-            "document-question-answering": LayoutLMv2ForQuestionAnswering,
-            "feature-extraction": LayoutLMv2Model,
-        }
+        {"document-question-answering": LayoutLMv2ForQuestionAnswering, "feature-extraction": LayoutLMv2Model}
        if is_torch_available()
        else {}
    )
--- a/tests/models/layoutlmv3/test_modeling_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_modeling_layoutlmv3.py
@ -286,10 +286,7 @@ class LayoutLMv3ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
        else ()
    )
    pipeline_model_mapping = (
-        {
-            "document-question-answering": LayoutLMv3ForQuestionAnswering,
-            "feature-extraction": LayoutLMv3Model,
-        }
+        {"document-question-answering": LayoutLMv3ForQuestionAnswering, "feature-extraction": LayoutLMv3Model}
        if is_torch_available()
        else {}
    )
--- a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py
@ -278,13 +278,7 @@ class TFLayoutLMv3ModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Te
        else ()
    )
    pipeline_model_mapping = (
-        {
-            "feature-extraction": TFLayoutLMv3Model,
-            "question-answering": TFLayoutLMv3ForQuestionAnswering,
-            "text-classification": TFLayoutLMv3ForSequenceClassification,
-            "token-classification": TFLayoutLMv3ForTokenClassification,
-            "zero-shot": TFLayoutLMv3ForSequenceClassification,
-        }
+        {"document-question-answering": TFLayoutLMv3ForQuestionAnswering, "feature-extraction": TFLayoutLMv3Model}
        if is_tf_available()
        else {}
    )
--- a/tests/models/timm_backbone/test_modeling_timm_backbone.py
+++ b/tests/models/timm_backbone/test_modeling_timm_backbone.py
@ -32,6 +32,8 @@ if is_torch_available():

    from transformers import TimmBackbone, TimmBackboneConfig

+from ...test_pipeline_mixin import PipelineTesterMixin
+

 class TimmBackboneModelTester:
    def __init__(
@ -95,8 +97,9 @@ class TimmBackboneModelTester:

@require_torch
@require_timm
-class TimmBackboneModelTest(ModelTesterMixin, BackboneTesterMixin, unittest.TestCase):
+class TimmBackboneModelTest(ModelTesterMixin, BackboneTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (TimmBackbone,) if is_torch_available() else ()
+    pipeline_model_mapping = {"feature-extraction": TimmBackbone} if is_torch_available() else {}
    test_resize_embeddings = False
    test_head_masking = False
    test_pruning = False
--- a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py
+++ b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py
@ -322,7 +322,7 @@ class TFWav2Vec2ModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
        (TFWav2Vec2Model, TFWav2Vec2ForCTC, TFWav2Vec2ForSequenceClassification) if is_tf_available() else ()
    )
    pipeline_model_mapping = (
-        {"feature-extraction": TFWav2Vec2Model, "audio-classification": TFWav2Vec2ForSequenceClassification}
+        {"audio-classification": TFWav2Vec2ForSequenceClassification, "feature-extraction": TFWav2Vec2Model}
        if is_tf_available()
        else {}
    )
--- a/tests/pipelines/test_pipelines_audio_classification.py
+++ b/tests/pipelines/test_pipelines_audio_classification.py
@ -16,7 +16,7 @@ import unittest

 import numpy as np

-from transformers import MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
+from transformers import MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING, TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
 from transformers.pipelines import AudioClassificationPipeline, pipeline
 from transformers.testing_utils import (
    is_pipeline_test,
@ -31,9 +31,9 @@ from .test_pipelines_common import ANY


@is_pipeline_test
-@require_torch
 class AudioClassificationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
+    tf_model_mapping = TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING

    def get_test_pipeline(self, model, tokenizer, processor):
        audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=processor)
--- a/tests/pipelines/test_pipelines_mask_generation.py
+++ b/tests/pipelines/test_pipelines_mask_generation.py
@ -18,7 +18,12 @@ from typing import Dict

 import numpy as np

-from transformers import MODEL_FOR_MASK_GENERATION_MAPPING, is_vision_available, pipeline
+from transformers import (
+    MODEL_FOR_MASK_GENERATION_MAPPING,
+    TF_MODEL_FOR_MASK_GENERATION_MAPPING,
+    is_vision_available,
+    pipeline,
+)
 from transformers.pipelines import MaskGenerationPipeline
 from transformers.testing_utils import (
    is_pipeline_test,
@ -58,6 +63,9 @@ class MaskGenerationPipelineTests(unittest.TestCase):
    model_mapping = dict(
        (list(MODEL_FOR_MASK_GENERATION_MAPPING.items()) if MODEL_FOR_MASK_GENERATION_MAPPING else [])
    )
+    tf_model_mapping = dict(
+        (list(TF_MODEL_FOR_MASK_GENERATION_MAPPING.items()) if TF_MODEL_FOR_MASK_GENERATION_MAPPING else [])
+    )

    def get_test_pipeline(self, model, tokenizer, processor):
        image_segmenter = MaskGenerationPipeline(model=model, image_processor=processor)
@ -66,7 +74,7 @@ class MaskGenerationPipelineTests(unittest.TestCase):
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
        ]

-    # TODO: Fix me @Arthur
+    # TODO: Implement me @Arthur
    def run_pipeline_test(self, mask_generator, examples):
        pass

--- a/tests/test_pipeline_mixin.py
+++ b/tests/test_pipeline_mixin.py
@ -17,6 +17,7 @@ import copy
 import json
 import os
 import random
+import unittest
 from pathlib import Path

 from transformers.testing_utils import (
@ -314,7 +315,6 @@ class PipelineTesterMixin:
        run_batch_test(pipeline, examples)

    @is_pipeline_test
-    @require_torch
    def test_pipeline_audio_classification(self):
        self.run_task_tests(task="audio-classification")

@ -366,6 +366,7 @@ class PipelineTesterMixin:
    def test_pipeline_image_to_text(self):
        self.run_task_tests(task="image-to-text")

+    @unittest.skip(reason="`run_pipeline_test` is currently not implemented.")
    @is_pipeline_test
    @require_vision
    @require_torch
--- a/tests/utils/tiny_model_summary.json
+++ b/tests/utils/tiny_model_summary.json
@ -1597,7 +1597,8 @@
            "EfficientFormerImageProcessor"
        ],
        "model_classes": [
-            "EfficientFormerForImageClassification"
+            "EfficientFormerForImageClassification",
+            "TFEfficientFormerForImageClassification"
        ],
        "sha": "ebadb628e12f268e321fcc756fa4606f7b5b3178"
    },
@ -1607,7 +1608,8 @@
            "EfficientFormerImageProcessor"
        ],
        "model_classes": [
-            "EfficientFormerForImageClassificationWithTeacher"
+            "EfficientFormerForImageClassificationWithTeacher",
+            "TFEfficientFormerForImageClassificationWithTeacher"
        ],
        "sha": "1beabce6da9cb4ebbeafcd1ef23fac36b4a269e2"
    },
@ -1617,7 +1619,8 @@
            "EfficientFormerImageProcessor"
        ],
        "model_classes": [
-            "EfficientFormerModel"
+            "EfficientFormerModel",
+            "TFEfficientFormerModel"
        ],
        "sha": "200fae5b875844d09c8a91d1c155b72b06a517f6"
    },
@ -1736,6 +1739,16 @@
        ],
        "sha": "312b532cbef26610d80f2bd008650160cae4f7a1"
    },
+    "EncodecModel": {
+        "tokenizer_classes": [],
+        "processor_classes": [
+            "EncodecFeatureExtractor"
+        ],
+        "model_classes": [
+            "EncodecModel"
+        ],
+        "sha": "e14c5a2fd6529c85cd4ac5a05ee9e550ced6a006"
+    },
    "EncoderDecoderModel": {
        "tokenizer_classes": [
            "BertTokenizer",
@ -3888,6 +3901,36 @@
        ],
        "sha": "b3a1452e7cb44b600b21ee14f3d5382366855a46"
    },
+    "MobileViTV2ForImageClassification": {
+        "tokenizer_classes": [],
+        "processor_classes": [
+            "MobileViTImageProcessor"
+        ],
+        "model_classes": [
+            "MobileViTV2ForImageClassification"
+        ],
+        "sha": "25752b0967ad594341d1b685401450d7f698433c"
+    },
+    "MobileViTV2ForSemanticSegmentation": {
+        "tokenizer_classes": [],
+        "processor_classes": [
+            "MobileViTImageProcessor"
+        ],
+        "model_classes": [
+            "MobileViTV2ForSemanticSegmentation"
+        ],
+        "sha": "13b953f50be33219d55a12f1098be38b88000897"
+    },
+    "MobileViTV2Model": {
+        "tokenizer_classes": [],
+        "processor_classes": [
+            "MobileViTImageProcessor"
+        ],
+        "model_classes": [
+            "MobileViTV2Model"
+        ],
+        "sha": "2f46357659db2d6d54d870e28073deeea1c8cb64"
+    },
    "MvpForCausalLM": {
        "tokenizer_classes": [
            "MvpTokenizer",
@ -4452,6 +4495,16 @@
        ],
        "sha": "83ec4d2d61ed62525ee033e13d144817beb29d19"
    },
+    "Pix2StructForConditionalGeneration": {
+        "tokenizer_classes": [
+            "T5TokenizerFast"
+        ],
+        "processor_classes": [
+            "Pix2StructImageProcessor"
+        ],
+        "model_classes": [],
+        "sha": "42b3de00ad535076c4893e4ac5ae2d2748cc4ccb"
+    },
    "PoolFormerForImageClassification": {
        "tokenizer_classes": [],
        "processor_classes": [
@ -5123,7 +5176,8 @@
            "SamImageProcessor"
        ],
        "model_classes": [
-            "SamModel"
+            "SamModel",
+            "TFSamModel"
        ],
        "sha": "eca8651bc84e5ac3b1b62e784b744a6bd1b82575"
    },