diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index f4e53099b19..1254761a2aa 100644 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -2965,11 +2965,13 @@ else: ) _import_structure["models.auto"].extend( [ + "TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING", "TF_MODEL_FOR_CAUSAL_LM_MAPPING", "TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING", "TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING", "TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING", "TF_MODEL_FOR_MASKED_LM_MAPPING", + "TF_MODEL_FOR_MASK_GENERATION_MAPPING", "TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING", "TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING", "TF_MODEL_FOR_PRETRAINING_MAPPING", @@ -6350,9 +6352,11 @@ if TYPE_CHECKING: TFAlbertPreTrainedModel, ) from .models.auto import ( + TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING, TF_MODEL_FOR_CAUSAL_LM_MAPPING, TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING, TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, + TF_MODEL_FOR_MASK_GENERATION_MAPPING, TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING, TF_MODEL_FOR_MASKED_LM_MAPPING, TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING, diff --git a/src/transformers/models/auto/__init__.py b/src/transformers/models/auto/__init__.py index 7ea870a9331..5af79da56f7 100644 --- a/src/transformers/models/auto/__init__.py +++ b/src/transformers/models/auto/__init__.py @@ -114,8 +114,10 @@ except OptionalDependencyNotAvailable: pass else: _import_structure["modeling_tf_auto"] = [ + "TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING", "TF_MODEL_FOR_CAUSAL_LM_MAPPING", "TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING", + "TF_MODEL_FOR_MASK_GENERATION_MAPPING", "TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING", "TF_MODEL_FOR_MASKED_LM_MAPPING", "TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING", @@ -279,9 +281,11 @@ if TYPE_CHECKING: pass else: from .modeling_tf_auto import ( + TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING, TF_MODEL_FOR_CAUSAL_LM_MAPPING, TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING, TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, + TF_MODEL_FOR_MASK_GENERATION_MAPPING, TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING, TF_MODEL_FOR_MASKED_LM_MAPPING, TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING, diff --git a/src/transformers/utils/dummy_tf_objects.py b/src/transformers/utils/dummy_tf_objects.py index 4a189174eee..4da32ae6034 100644 --- a/src/transformers/utils/dummy_tf_objects.py +++ b/src/transformers/utils/dummy_tf_objects.py @@ -216,6 +216,9 @@ class TFAlbertPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["tf"]) +TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING = None + + TF_MODEL_FOR_CAUSAL_LM_MAPPING = None @@ -225,6 +228,9 @@ TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = None TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING = None +TF_MODEL_FOR_MASK_GENERATION_MAPPING = None + + TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING = None diff --git a/tests/models/autoformer/test_modeling_autoformer.py b/tests/models/autoformer/test_modeling_autoformer.py index 9df5bf236e0..9f0434689c4 100644 --- a/tests/models/autoformer/test_modeling_autoformer.py +++ b/tests/models/autoformer/test_modeling_autoformer.py @@ -25,6 +25,7 @@ from transformers.testing_utils import require_torch, slow, torch_device from ...test_configuration_common import ConfigTester from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor +from ...test_pipeline_mixin import PipelineTesterMixin TOLERANCE = 1e-4 @@ -201,9 +202,10 @@ class AutoformerModelTester: @require_torch -class AutoformerModelTest(ModelTesterMixin, unittest.TestCase): +class AutoformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = (AutoformerModel, AutoformerForPrediction) if is_torch_available() else () all_generative_model_classes = (AutoformerForPrediction,) if is_torch_available() else () + pipeline_model_mapping = {"feature-extraction": AutoformerModel} if is_torch_available() else {} test_pruning = False test_head_masking = False test_missing_keys = False diff --git a/tests/models/encodec/test_modeling_encodec.py b/tests/models/encodec/test_modeling_encodec.py index 398da6f5d09..a1693b75824 100644 --- a/tests/models/encodec/test_modeling_encodec.py +++ b/tests/models/encodec/test_modeling_encodec.py @@ -117,7 +117,7 @@ class EncodecModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase) test_pruning = False test_headmasking = False test_resize_embeddings = False - pipeline_model_mapping = {} + pipeline_model_mapping = {"feature-extraction": EncodecModel} if is_torch_available() else {} input_name = "input_values" def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): diff --git a/tests/models/git/test_modeling_git.py b/tests/models/git/test_modeling_git.py index b6384ae15f9..45b4457fdcc 100644 --- a/tests/models/git/test_modeling_git.py +++ b/tests/models/git/test_modeling_git.py @@ -383,11 +383,22 @@ class GitModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, all_model_classes = (GitModel, GitForCausalLM) if is_torch_available() else () all_generative_model_classes = (GitForCausalLM,) if is_torch_available() else () pipeline_model_mapping = ( - {"feature-extraction": GitModel, "text-generation": GitForCausalLM} if is_torch_available() else {} + {"feature-extraction": GitModel, "image-to-text": GitForCausalLM, "text-generation": GitForCausalLM} + if is_torch_available() + else {} ) fx_compatible = False test_torchscript = False + # `GitForCausalLM` doesn't fit into image-to-text pipeline. We might need to overwrite its `generate` function. + def is_pipeline_test_to_skip( + self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name + ): + if pipeline_test_casse_name == "ImageToTextPipelineTests": + return True + + return False + # special case for GitForCausalLM model def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py index c2b7fe34ee7..7d2f35c8b94 100644 --- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py @@ -270,10 +270,7 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa else () ) pipeline_model_mapping = ( - { - "document-question-answering": LayoutLMv2ForQuestionAnswering, - "feature-extraction": LayoutLMv2Model, - } + {"document-question-answering": LayoutLMv2ForQuestionAnswering, "feature-extraction": LayoutLMv2Model} if is_torch_available() else {} ) diff --git a/tests/models/layoutlmv3/test_modeling_layoutlmv3.py b/tests/models/layoutlmv3/test_modeling_layoutlmv3.py index c6a2a1bf37f..c458024f105 100644 --- a/tests/models/layoutlmv3/test_modeling_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_modeling_layoutlmv3.py @@ -286,10 +286,7 @@ class LayoutLMv3ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa else () ) pipeline_model_mapping = ( - { - "document-question-answering": LayoutLMv3ForQuestionAnswering, - "feature-extraction": LayoutLMv3Model, - } + {"document-question-answering": LayoutLMv3ForQuestionAnswering, "feature-extraction": LayoutLMv3Model} if is_torch_available() else {} ) diff --git a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py index a1e2cd59083..1bdb3e2648d 100644 --- a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py @@ -278,13 +278,7 @@ class TFLayoutLMv3ModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Te else () ) pipeline_model_mapping = ( - { - "feature-extraction": TFLayoutLMv3Model, - "question-answering": TFLayoutLMv3ForQuestionAnswering, - "text-classification": TFLayoutLMv3ForSequenceClassification, - "token-classification": TFLayoutLMv3ForTokenClassification, - "zero-shot": TFLayoutLMv3ForSequenceClassification, - } + {"document-question-answering": TFLayoutLMv3ForQuestionAnswering, "feature-extraction": TFLayoutLMv3Model} if is_tf_available() else {} ) diff --git a/tests/models/timm_backbone/test_modeling_timm_backbone.py b/tests/models/timm_backbone/test_modeling_timm_backbone.py index f58716e0f2f..145238c6bfd 100644 --- a/tests/models/timm_backbone/test_modeling_timm_backbone.py +++ b/tests/models/timm_backbone/test_modeling_timm_backbone.py @@ -32,6 +32,8 @@ if is_torch_available(): from transformers import TimmBackbone, TimmBackboneConfig +from ...test_pipeline_mixin import PipelineTesterMixin + class TimmBackboneModelTester: def __init__( @@ -95,8 +97,9 @@ class TimmBackboneModelTester: @require_torch @require_timm -class TimmBackboneModelTest(ModelTesterMixin, BackboneTesterMixin, unittest.TestCase): +class TimmBackboneModelTest(ModelTesterMixin, BackboneTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = (TimmBackbone,) if is_torch_available() else () + pipeline_model_mapping = {"feature-extraction": TimmBackbone} if is_torch_available() else {} test_resize_embeddings = False test_head_masking = False test_pruning = False diff --git a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py index 391d8e8ce1f..3554d18957c 100644 --- a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py @@ -322,7 +322,7 @@ class TFWav2Vec2ModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test (TFWav2Vec2Model, TFWav2Vec2ForCTC, TFWav2Vec2ForSequenceClassification) if is_tf_available() else () ) pipeline_model_mapping = ( - {"feature-extraction": TFWav2Vec2Model, "audio-classification": TFWav2Vec2ForSequenceClassification} + {"audio-classification": TFWav2Vec2ForSequenceClassification, "feature-extraction": TFWav2Vec2Model} if is_tf_available() else {} ) diff --git a/tests/pipelines/test_pipelines_audio_classification.py b/tests/pipelines/test_pipelines_audio_classification.py index 208690396c4..8f2e46e0a50 100644 --- a/tests/pipelines/test_pipelines_audio_classification.py +++ b/tests/pipelines/test_pipelines_audio_classification.py @@ -16,7 +16,7 @@ import unittest import numpy as np -from transformers import MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING +from transformers import MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING, TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING from transformers.pipelines import AudioClassificationPipeline, pipeline from transformers.testing_utils import ( is_pipeline_test, @@ -31,9 +31,9 @@ from .test_pipelines_common import ANY @is_pipeline_test -@require_torch class AudioClassificationPipelineTests(unittest.TestCase): model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING + tf_model_mapping = TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING def get_test_pipeline(self, model, tokenizer, processor): audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=processor) diff --git a/tests/pipelines/test_pipelines_mask_generation.py b/tests/pipelines/test_pipelines_mask_generation.py index 53775deda28..cf170390651 100644 --- a/tests/pipelines/test_pipelines_mask_generation.py +++ b/tests/pipelines/test_pipelines_mask_generation.py @@ -18,7 +18,12 @@ from typing import Dict import numpy as np -from transformers import MODEL_FOR_MASK_GENERATION_MAPPING, is_vision_available, pipeline +from transformers import ( + MODEL_FOR_MASK_GENERATION_MAPPING, + TF_MODEL_FOR_MASK_GENERATION_MAPPING, + is_vision_available, + pipeline, +) from transformers.pipelines import MaskGenerationPipeline from transformers.testing_utils import ( is_pipeline_test, @@ -58,6 +63,9 @@ class MaskGenerationPipelineTests(unittest.TestCase): model_mapping = dict( (list(MODEL_FOR_MASK_GENERATION_MAPPING.items()) if MODEL_FOR_MASK_GENERATION_MAPPING else []) ) + tf_model_mapping = dict( + (list(TF_MODEL_FOR_MASK_GENERATION_MAPPING.items()) if TF_MODEL_FOR_MASK_GENERATION_MAPPING else []) + ) def get_test_pipeline(self, model, tokenizer, processor): image_segmenter = MaskGenerationPipeline(model=model, image_processor=processor) @@ -66,7 +74,7 @@ class MaskGenerationPipelineTests(unittest.TestCase): "./tests/fixtures/tests_samples/COCO/000000039769.png", ] - # TODO: Fix me @Arthur + # TODO: Implement me @Arthur def run_pipeline_test(self, mask_generator, examples): pass diff --git a/tests/test_pipeline_mixin.py b/tests/test_pipeline_mixin.py index 05ea27121a5..1fa8f378e40 100644 --- a/tests/test_pipeline_mixin.py +++ b/tests/test_pipeline_mixin.py @@ -17,6 +17,7 @@ import copy import json import os import random +import unittest from pathlib import Path from transformers.testing_utils import ( @@ -314,7 +315,6 @@ class PipelineTesterMixin: run_batch_test(pipeline, examples) @is_pipeline_test - @require_torch def test_pipeline_audio_classification(self): self.run_task_tests(task="audio-classification") @@ -366,6 +366,7 @@ class PipelineTesterMixin: def test_pipeline_image_to_text(self): self.run_task_tests(task="image-to-text") + @unittest.skip(reason="`run_pipeline_test` is currently not implemented.") @is_pipeline_test @require_vision @require_torch diff --git a/tests/utils/tiny_model_summary.json b/tests/utils/tiny_model_summary.json index a186bf17e19..2d0575612d6 100644 --- a/tests/utils/tiny_model_summary.json +++ b/tests/utils/tiny_model_summary.json @@ -1597,7 +1597,8 @@ "EfficientFormerImageProcessor" ], "model_classes": [ - "EfficientFormerForImageClassification" + "EfficientFormerForImageClassification", + "TFEfficientFormerForImageClassification" ], "sha": "ebadb628e12f268e321fcc756fa4606f7b5b3178" }, @@ -1607,7 +1608,8 @@ "EfficientFormerImageProcessor" ], "model_classes": [ - "EfficientFormerForImageClassificationWithTeacher" + "EfficientFormerForImageClassificationWithTeacher", + "TFEfficientFormerForImageClassificationWithTeacher" ], "sha": "1beabce6da9cb4ebbeafcd1ef23fac36b4a269e2" }, @@ -1617,7 +1619,8 @@ "EfficientFormerImageProcessor" ], "model_classes": [ - "EfficientFormerModel" + "EfficientFormerModel", + "TFEfficientFormerModel" ], "sha": "200fae5b875844d09c8a91d1c155b72b06a517f6" }, @@ -1736,6 +1739,16 @@ ], "sha": "312b532cbef26610d80f2bd008650160cae4f7a1" }, + "EncodecModel": { + "tokenizer_classes": [], + "processor_classes": [ + "EncodecFeatureExtractor" + ], + "model_classes": [ + "EncodecModel" + ], + "sha": "e14c5a2fd6529c85cd4ac5a05ee9e550ced6a006" + }, "EncoderDecoderModel": { "tokenizer_classes": [ "BertTokenizer", @@ -3888,6 +3901,36 @@ ], "sha": "b3a1452e7cb44b600b21ee14f3d5382366855a46" }, + "MobileViTV2ForImageClassification": { + "tokenizer_classes": [], + "processor_classes": [ + "MobileViTImageProcessor" + ], + "model_classes": [ + "MobileViTV2ForImageClassification" + ], + "sha": "25752b0967ad594341d1b685401450d7f698433c" + }, + "MobileViTV2ForSemanticSegmentation": { + "tokenizer_classes": [], + "processor_classes": [ + "MobileViTImageProcessor" + ], + "model_classes": [ + "MobileViTV2ForSemanticSegmentation" + ], + "sha": "13b953f50be33219d55a12f1098be38b88000897" + }, + "MobileViTV2Model": { + "tokenizer_classes": [], + "processor_classes": [ + "MobileViTImageProcessor" + ], + "model_classes": [ + "MobileViTV2Model" + ], + "sha": "2f46357659db2d6d54d870e28073deeea1c8cb64" + }, "MvpForCausalLM": { "tokenizer_classes": [ "MvpTokenizer", @@ -4452,6 +4495,16 @@ ], "sha": "83ec4d2d61ed62525ee033e13d144817beb29d19" }, + "Pix2StructForConditionalGeneration": { + "tokenizer_classes": [ + "T5TokenizerFast" + ], + "processor_classes": [ + "Pix2StructImageProcessor" + ], + "model_classes": [], + "sha": "42b3de00ad535076c4893e4ac5ae2d2748cc4ccb" + }, "PoolFormerForImageClassification": { "tokenizer_classes": [], "processor_classes": [ @@ -5123,7 +5176,8 @@ "SamImageProcessor" ], "model_classes": [ - "SamModel" + "SamModel", + "TFSamModel" ], "sha": "eca8651bc84e5ac3b1b62e784b744a6bd1b82575" },