Make more test models smaller (#25005)

* Make more test models tiny * Make more test models tiny * More models * More models
2025-07-31 02:02:21 +06:00 · 2023-07-24 10:08:47 -04:00 · 2023-07-24 10:08:47 -04:00 · 42571f6eb8
commit 42571f6eb8
parent 8f1f0bf50f
22 changed files with 149 additions and 137 deletions
--- a/tests/models/ctrl/test_modeling_ctrl.py
+++ b/tests/models/ctrl/test_modeling_ctrl.py
@ -133,7 +133,7 @@ class CTRLModelTester:
            n_embd=self.hidden_size,
            n_layer=self.num_hidden_layers,
            n_head=self.num_attention_heads,
-            # intermediate_size=self.intermediate_size,
+            dff=self.intermediate_size,
            # hidden_act=self.hidden_act,
            # hidden_dropout_prob=self.hidden_dropout_prob,
            # attention_probs_dropout_prob=self.attention_probs_dropout_prob,
@ -243,10 +243,6 @@ class CTRLModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_lm_head_model(*config_and_inputs)

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    @slow
    def test_model_from_pretrained(self):
        for model_name in CTRL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
--- a/tests/models/ctrl/test_modeling_tf_ctrl.py
+++ b/tests/models/ctrl/test_modeling_tf_ctrl.py
@ -95,7 +95,7 @@ class TFCTRLModelTester(object):
            n_embd=self.hidden_size,
            n_layer=self.num_hidden_layers,
            n_head=self.num_attention_heads,
-            # intermediate_size=self.intermediate_size,
+            dff=self.intermediate_size,
            # hidden_act=self.hidden_act,
            # hidden_dropout_prob=self.hidden_dropout_prob,
            # attention_probs_dropout_prob=self.attention_probs_dropout_prob,
--- a/tests/models/cvt/test_modeling_cvt.py
+++ b/tests/models/cvt/test_modeling_cvt.py
@ -55,8 +55,8 @@ class CvtModelTester:
        batch_size=13,
        image_size=64,
        num_channels=3,
-        embed_dim=[16, 48, 96],
-        num_heads=[1, 3, 6],
+        embed_dim=[16, 32, 48],
+        num_heads=[1, 2, 3],
        depth=[1, 2, 10],
        patch_sizes=[7, 3, 3],
        patch_stride=[4, 2, 2],
@ -247,10 +247,6 @@ class CvtModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_image_classification(*config_and_inputs)

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    @slow
    def test_model_from_pretrained(self):
        for model_name in CVT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
--- a/tests/models/cvt/test_modeling_tf_cvt.py
+++ b/tests/models/cvt/test_modeling_tf_cvt.py
@ -45,8 +45,8 @@ class TFCvtModelTester:
        batch_size=13,
        image_size=64,
        num_channels=3,
-        embed_dim=[16, 48, 96],
-        num_heads=[1, 3, 6],
+        embed_dim=[16, 32, 48],
+        num_heads=[1, 2, 3],
        depth=[1, 2, 10],
        patch_sizes=[7, 3, 3],
        patch_stride=[4, 2, 2],
--- a/tests/models/deta/test_modeling_deta.py
+++ b/tests/models/deta/test_modeling_deta.py
@ -19,7 +19,7 @@ import inspect
 import math
 import unittest

-from transformers import DetaConfig, is_torch_available, is_torchvision_available, is_vision_available
+from transformers import DetaConfig, ResNetConfig, is_torch_available, is_torchvision_available, is_vision_available
 from transformers.file_utils import cached_property
 from transformers.testing_utils import require_torchvision, require_vision, slow, torch_device

@ -49,7 +49,7 @@ class DetaModelTester:
        batch_size=8,
        is_training=True,
        use_labels=True,
-        hidden_size=256,
+        hidden_size=32,
        num_hidden_layers=2,
        num_attention_heads=8,
        intermediate_size=4,
@ -118,6 +118,16 @@ class DetaModelTester:
        return config, pixel_values, pixel_mask, labels

    def get_config(self):
+        resnet_config = ResNetConfig(
+            num_channels=3,
+            embeddings_size=10,
+            hidden_sizes=[10, 20, 30, 40],
+            depths=[1, 1, 2, 1],
+            hidden_act="relu",
+            num_labels=3,
+            out_features=["stage2", "stage3", "stage4"],
+            out_indices=[2, 3, 4],
+        )
        return DetaConfig(
            d_model=self.hidden_size,
            encoder_layers=self.num_hidden_layers,
@ -134,6 +144,7 @@ class DetaModelTester:
            encoder_n_points=self.encoder_n_points,
            decoder_n_points=self.decoder_n_points,
            two_stage=self.two_stage,
+            backbone_config=resnet_config,
        )

    def prepare_config_and_inputs_for_common(self):
@ -423,10 +434,6 @@ class DetaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
    def test_tied_model_weights_key_ignore(self):
        pass

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    def test_initialization(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

--- a/tests/models/dpt/test_modeling_dpt.py
+++ b/tests/models/dpt/test_modeling_dpt.py
@ -62,6 +62,7 @@ class DPTModelTester:
        attention_probs_dropout_prob=0.1,
        initializer_range=0.02,
        num_labels=3,
+        neck_hidden_sizes=[16, 16, 32, 32],
        is_hybrid=False,
        scope=None,
    ):
@ -84,6 +85,7 @@ class DPTModelTester:
        self.num_labels = num_labels
        self.scope = scope
        self.is_hybrid = is_hybrid
+        self.neck_hidden_sizes = neck_hidden_sizes
        # sequence length of DPT = num_patches + 1 (we add 1 for the [CLS] token)
        num_patches = (image_size // patch_size) ** 2
        self.seq_length = num_patches + 1
@ -105,6 +107,7 @@ class DPTModelTester:
            patch_size=self.patch_size,
            num_channels=self.num_channels,
            hidden_size=self.hidden_size,
+            fusion_hidden_size=self.hidden_size,
            num_hidden_layers=self.num_hidden_layers,
            backbone_out_indices=self.backbone_out_indices,
            num_attention_heads=self.num_attention_heads,
@ -115,6 +118,7 @@ class DPTModelTester:
            is_decoder=False,
            initializer_range=self.initializer_range,
            is_hybrid=self.is_hybrid,
+            neck_hidden_sizes=self.neck_hidden_sizes,
        )

    def create_and_check_model(self, config, pixel_values, labels):
@ -275,10 +279,6 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
                        msg=f"Parameter {name} of model {model_class} seems not properly initialized",
                    )

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    @slow
    def test_model_from_pretrained(self):
        for model_name in DPT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
--- a/tests/models/dpt/test_modeling_dpt_hybrid.py
+++ b/tests/models/dpt/test_modeling_dpt_hybrid.py
@ -62,7 +62,8 @@ class DPTModelTester:
        attention_probs_dropout_prob=0.1,
        initializer_range=0.02,
        num_labels=3,
-        backbone_featmap_shape=[1, 384, 24, 24],
+        backbone_featmap_shape=[1, 32, 24, 24],
+        neck_hidden_sizes=[16, 16, 32, 32],
        is_hybrid=True,
        scope=None,
    ):
@ -86,6 +87,7 @@ class DPTModelTester:
        self.backbone_featmap_shape = backbone_featmap_shape
        self.scope = scope
        self.is_hybrid = is_hybrid
+        self.neck_hidden_sizes = neck_hidden_sizes
        # sequence length of DPT = num_patches + 1 (we add 1 for the [CLS] token)
        num_patches = (image_size // patch_size) ** 2
        self.seq_length = num_patches + 1
@ -108,7 +110,7 @@ class DPTModelTester:
            "depths": [3, 4, 9],
            "out_features": ["stage1", "stage2", "stage3"],
            "embedding_dynamic_padding": True,
-            "hidden_sizes": [96, 192, 384, 768],
+            "hidden_sizes": [16, 16, 32, 32],
            "num_groups": 2,
        }

@ -117,6 +119,7 @@ class DPTModelTester:
            patch_size=self.patch_size,
            num_channels=self.num_channels,
            hidden_size=self.hidden_size,
+            fusion_hidden_size=self.hidden_size,
            num_hidden_layers=self.num_hidden_layers,
            backbone_out_indices=self.backbone_out_indices,
            num_attention_heads=self.num_attention_heads,
@ -129,6 +132,7 @@ class DPTModelTester:
            is_hybrid=self.is_hybrid,
            backbone_config=backbone_config,
            backbone_featmap_shape=self.backbone_featmap_shape,
+            neck_hidden_sizes=self.neck_hidden_sizes,
        )

    def create_and_check_model(self, config, pixel_values, labels):
@ -289,10 +293,6 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
                        msg=f"Parameter {name} of model {model_class} seems not properly initialized",
                    )

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    @slow
    def test_model_from_pretrained(self):
        for model_name in DPT_PRETRAINED_MODEL_ARCHIVE_LIST[1:]:
--- a/tests/models/efficientnet/test_modeling_efficientnet.py
+++ b/tests/models/efficientnet/test_modeling_efficientnet.py
@ -49,7 +49,7 @@ class EfficientNetModelTester:
        num_channels=3,
        kernel_sizes=[3, 3, 5],
        in_channels=[32, 16, 24],
-        out_channels=[16, 24, 40],
+        out_channels=[16, 24, 20],
        strides=[1, 1, 2],
        num_block_repeats=[1, 1, 2],
        expand_ratios=[1, 6, 6],
@ -223,10 +223,6 @@ class EfficientNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Test
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_image_classification(*config_and_inputs)

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    @slow
    def test_model_from_pretrained(self):
        for model_name in EFFICIENTNET_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
--- a/tests/models/encodec/test_modeling_encodec.py
+++ b/tests/models/encodec/test_modeling_encodec.py
@ -77,16 +77,25 @@ class EncodecModelTester:
        batch_size=12,
        num_channels=2,
        is_training=False,
-        num_hidden_layers=4,
        intermediate_size=40,
+        hidden_size=32,
+        num_filters=8,
+        num_residual_layers=1,
+        upsampling_ratios=[8, 4],
+        num_lstm_layers=1,
+        codebook_size=64,
    ):
        self.parent = parent
        self.batch_size = batch_size
        self.num_channels = num_channels
        self.is_training = is_training
-
-        self.num_hidden_layers = num_hidden_layers
        self.intermediate_size = intermediate_size
+        self.hidden_size = hidden_size
+        self.num_filters = num_filters
+        self.num_residual_layers = num_residual_layers
+        self.upsampling_ratios = upsampling_ratios
+        self.num_lstm_layers = num_lstm_layers
+        self.codebook_size = codebook_size

    def prepare_config_and_inputs(self):
        input_values = floats_tensor([self.batch_size, self.num_channels, self.intermediate_size], scale=1.0)
@ -99,7 +108,16 @@ class EncodecModelTester:
        return config, inputs_dict

    def get_config(self):
-        return EncodecConfig(audio_channels=self.num_channels, chunk_in_sec=None)
+        return EncodecConfig(
+            audio_channels=self.num_channels,
+            chunk_in_sec=None,
+            hidden_size=self.hidden_size,
+            num_filters=self.num_filters,
+            num_residual_layers=self.num_residual_layers,
+            upsampling_ratios=self.upsampling_ratios,
+            num_lstm_layers=self.num_lstm_layers,
+            codebook_size=self.codebook_size,
+        )

    def create_and_check_model_forward(self, config, inputs_dict):
        model = EncodecModel(config=config).to(torch_device).eval()
@ -397,10 +415,6 @@ class EncodecModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
                            msg=f"Parameter {name} of model {model_class} seems not properly initialized",
                        )

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    def test_identity_shortcut(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs()
        config.use_conv_shortcut = False
--- a/tests/models/esm/test_modeling_esm.py
+++ b/tests/models/esm/test_modeling_esm.py
@ -279,10 +279,6 @@ class EsmModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def test_resize_tokens_embeddings(self):
        pass

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-

@require_torch
 class EsmModelIntegrationTest(TestCasePlus):
--- a/tests/models/esm/test_modeling_esmfold.py
+++ b/tests/models/esm/test_modeling_esmfold.py
@ -100,6 +100,28 @@ class EsmFoldModelTester:
        return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels

    def get_config(self):
+        esmfold_config = {
+            "trunk": {
+                "num_blocks": 2,
+                "sequence_state_dim": 64,
+                "pairwise_state_dim": 16,
+                "sequence_head_width": 4,
+                "pairwise_head_width": 4,
+                "position_bins": 4,
+                "chunk_size": 16,
+                "structure_module": {
+                    "ipa_dim": 16,
+                    "num_angles": 7,
+                    "num_blocks": 2,
+                    "num_heads_ipa": 4,
+                    "pairwise_dim": 16,
+                    "resnet_dim": 16,
+                    "sequence_dim": 48,
+                },
+            },
+            "fp16_esm": False,
+            "lddt_head_hid_dim": 16,
+        }
        config = EsmConfig(
            vocab_size=33,
            hidden_size=self.hidden_size,
@ -114,7 +136,7 @@ class EsmFoldModelTester:
            type_vocab_size=self.type_vocab_size,
            initializer_range=self.initializer_range,
            is_folding_model=True,
-            esmfold_config={"trunk": {"num_blocks": 2}, "fp16_esm": False},
+            esmfold_config=esmfold_config,
        )
        return config

@ -126,8 +148,8 @@ class EsmFoldModelTester:
        result = model(input_ids)
        result = model(input_ids)

-        self.parent.assertEqual(result.positions.shape, (8, self.batch_size, self.seq_length, 14, 3))
-        self.parent.assertEqual(result.angles.shape, (8, self.batch_size, self.seq_length, 7, 2))
+        self.parent.assertEqual(result.positions.shape, (2, self.batch_size, self.seq_length, 14, 3))
+        self.parent.assertEqual(result.angles.shape, (2, self.batch_size, self.seq_length, 7, 2))

    def prepare_config_and_inputs_for_common(self):
        config_and_inputs = self.prepare_config_and_inputs()
@ -243,10 +265,6 @@ class EsmFoldModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    def test_multi_gpu_data_parallel_forward(self):
        pass

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-

@require_torch
 class EsmModelIntegrationTest(TestCasePlus):
--- a/tests/models/flava/test_modeling_flava.py
+++ b/tests/models/flava/test_modeling_flava.py
@ -92,7 +92,7 @@ class FlavaImageModelTester:
        num_channels=3,
        qkv_bias=True,
        mask_token=True,
-        vocab_size=8192,
+        vocab_size=99,
    ):
        self.parent = parent
        self.batch_size = batch_size
@ -321,10 +321,6 @@ class FlavaImageModelTest(ModelTesterMixin, unittest.TestCase):
    def test_save_load_fast_init_to_base(self):
        pass

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    @slow
    def test_model_from_pretrained(self):
        for model_name in FLAVA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
@ -341,7 +337,7 @@ class FlavaTextModelTester:
        is_training=True,
        use_input_mask=True,
        use_token_type_ids=True,
-        vocab_size=30522,
+        vocab_size=102,
        type_vocab_size=2,
        max_position_embeddings=512,
        position_embedding_type="absolute",
@ -476,10 +472,6 @@ class FlavaTextModelTest(ModelTesterMixin, unittest.TestCase):
    def test_save_load_fast_init_to_base(self):
        pass

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    @slow
    def test_model_from_pretrained(self):
        for model_name in FLAVA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
@ -632,10 +624,6 @@ class FlavaMultimodalModelTest(ModelTesterMixin, unittest.TestCase):
    def test_save_load_fast_init_to_base(self):
        pass

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    @slow
    def test_model_from_pretrained(self):
        for model_name in FLAVA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
@ -644,11 +632,23 @@ class FlavaMultimodalModelTest(ModelTesterMixin, unittest.TestCase):


 class FlavaImageCodebookTester:
-    def __init__(self, parent, batch_size=12, image_size=112, num_channels=3):
+    def __init__(
+        self,
+        parent,
+        batch_size=12,
+        image_size=112,
+        num_channels=3,
+        hidden_size=32,
+        num_groups=2,
+        vocab_size=99,
+    ):
        self.parent = parent
        self.batch_size = batch_size
        self.image_size = image_size
        self.num_channels = num_channels
+        self.hidden_size = hidden_size
+        self.num_groups = num_groups
+        self.vocab_size = vocab_size

    def prepare_config_and_inputs(self):
        pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
@ -657,7 +657,9 @@ class FlavaImageCodebookTester:
        return config, pixel_values

    def get_config(self):
-        return FlavaImageCodebookConfig()
+        return FlavaImageCodebookConfig(
+            hidden_size=self.hidden_size, num_groups=self.num_groups, vocab_size=self.vocab_size
+        )

    def create_and_check_model(self, config, pixel_values):
        model = FlavaImageCodebook(config=config)
@ -743,10 +745,6 @@ class FlavaImageCodebookTest(ModelTesterMixin, unittest.TestCase):
    def test_save_load_fast_init_to_base(self):
        pass

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    @slow
    def test_model_from_pretrained(self):
        for model_name in FLAVA_CODEBOOK_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
@ -929,10 +927,6 @@ class FlavaModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
                            msg=f"Parameter {name} of model {model_class} seems not properly initialized",
                        )

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
            return
--- a/tests/models/git/test_modeling_git.py
+++ b/tests/models/git/test_modeling_git.py
@ -203,7 +203,7 @@ class GitModelTester:
        use_labels=True,
        vocab_size=99,
        hidden_size=32,
-        num_hidden_layers=5,
+        num_hidden_layers=4,
        num_attention_heads=4,
        intermediate_size=37,
        hidden_act="gelu",
@ -268,6 +268,10 @@ class GitModelTester:
                "num_channels": self.num_channels,
                "image_size": self.image_size,
                "patch_size": self.patch_size,
+                "hidden_size": self.hidden_size,
+                "projection_dim": 32,
+                "num_hidden_layers": self.num_hidden_layers,
+                "num_attention_heads": self.num_attention_heads,
            },
            vocab_size=self.vocab_size,
            hidden_size=self.hidden_size,
@ -454,10 +458,6 @@ class GitModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
    def test_greedy_generate_dict_outputs_use_cache(self):
        pass

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-

@require_torch
@require_vision
--- a/tests/models/gptsan_japanese/test_modeling_gptsan_japanese.py
+++ b/tests/models/gptsan_japanese/test_modeling_gptsan_japanese.py
@ -38,7 +38,7 @@ class GPTSanJapaneseTester:
    def __init__(
        self,
        parent,
-        vocab_size=36000,
+        vocab_size=99,
        batch_size=13,
        num_contexts=7,
        # For common tests
@ -182,10 +182,6 @@ class GPTSanJapaneseTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
    def test_model_parallelism(self):
        super().test_model_parallelism()

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-

@require_torch
 class GPTSanJapaneseForConditionalGenerationTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
@ -216,10 +212,6 @@ class GPTSanJapaneseForConditionalGenerationTest(ModelTesterMixin, GenerationTes
    def test_model_parallelism(self):
        super().test_model_parallelism()

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    @slow
    def test_logits(self):
        model = GPTSanJapaneseForConditionalGeneration.from_pretrained("Tanrei/GPTSAN-japanese")
--- a/tests/models/graphormer/test_modeling_graphormer.py
+++ b/tests/models/graphormer/test_modeling_graphormer.py
@ -42,22 +42,22 @@ class GraphormerModelTester:
        self,
        parent,
        num_classes=1,
-        num_atoms=512 * 9,
-        num_edges=512 * 3,
-        num_in_degree=512,
-        num_out_degree=512,
-        num_spatial=512,
-        num_edge_dis=128,
+        num_atoms=32 * 9,
+        num_edges=32 * 3,
+        num_in_degree=32,
+        num_out_degree=32,
+        num_spatial=32,
+        num_edge_dis=16,
        multi_hop_max_dist=5,  # sometimes is 20
-        spatial_pos_max=1024,
+        spatial_pos_max=32,
        edge_type="multi_hop",
        init_fn=None,
-        max_nodes=512,
+        max_nodes=32,
        share_input_output_embed=False,
-        num_hidden_layers=12,
-        embedding_dim=768,
-        ffn_embedding_dim=768,
-        num_attention_heads=32,
+        num_hidden_layers=2,
+        embedding_dim=32,
+        ffn_embedding_dim=32,
+        num_attention_heads=4,
        dropout=0.1,
        attention_dropout=0.1,
        activation_dropout=0.1,
@ -470,10 +470,6 @@ class GraphormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_graph_classification(*config_and_inputs)

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    @slow
    def test_model_from_pretrained(self):
        for model_name in GRAPHORMER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
--- a/tests/models/levit/test_modeling_levit.py
+++ b/tests/models/levit/test_modeling_levit.py
@ -67,10 +67,10 @@ class LevitModelTester:
        stride=2,
        padding=1,
        patch_size=16,
-        hidden_sizes=[128, 256, 384],
-        num_attention_heads=[4, 6, 8],
+        hidden_sizes=[16, 32, 48],
+        num_attention_heads=[1, 2, 3],
        depths=[2, 3, 4],
-        key_dim=[16, 16, 16],
+        key_dim=[8, 8, 8],
        drop_path_rate=0,
        mlp_ratio=[2, 2, 2],
        attention_ratio=[2, 2, 2],
@ -282,10 +282,6 @@ class LevitModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):

            check_hidden_states_output(inputs_dict, config, model_class)

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
        inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)

--- a/tests/models/mask2former/test_modeling_mask2former.py
+++ b/tests/models/mask2former/test_modeling_mask2former.py
@ -54,6 +54,8 @@ class Mask2FormerModelTester:
        max_size=32 * 8,
        num_labels=4,
        hidden_dim=64,
+        num_attention_heads=4,
+        num_hidden_layers=2,
    ):
        self.parent = parent
        self.batch_size = batch_size
@ -66,6 +68,8 @@ class Mask2FormerModelTester:
        self.num_labels = num_labels
        self.hidden_dim = hidden_dim
        self.mask_feature_size = hidden_dim
+        self.num_attention_heads = num_attention_heads
+        self.num_hidden_layers = num_hidden_layers

    def prepare_config_and_inputs(self):
        pixel_values = floats_tensor([self.batch_size, self.num_channels, self.min_size, self.max_size]).to(
@ -85,15 +89,25 @@ class Mask2FormerModelTester:
    def get_config(self):
        config = Mask2FormerConfig(
            hidden_size=self.hidden_dim,
+            num_attention_heads=self.num_attention_heads,
+            num_hidden_layers=self.num_hidden_layers,
+            encoder_feedforward_dim=16,
+            dim_feedforward=32,
+            num_queries=self.num_queries,
+            num_labels=self.num_labels,
+            decoder_layers=2,
+            encoder_layers=2,
+            feature_size=16,
        )
        config.num_queries = self.num_queries
        config.num_labels = self.num_labels

+        config.backbone_config.embed_dim = 16
        config.backbone_config.depths = [1, 1, 1, 1]
+        config.backbone_config.hidden_size = 16
        config.backbone_config.num_channels = self.num_channels
+        config.backbone_config.num_heads = [1, 1, 2, 2]

-        config.encoder_feedforward_dim = 64
-        config.dim_feedforward = 128
        config.hidden_dim = self.hidden_dim
        config.mask_feature_size = self.hidden_dim
        config.feature_size = self.hidden_dim
@ -220,10 +234,6 @@ class Mask2FormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
    def test_multi_gpu_data_parallel_forward(self):
        pass

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

--- a/tests/models/maskformer/test_modeling_maskformer.py
+++ b/tests/models/maskformer/test_modeling_maskformer.py
@ -85,9 +85,15 @@ class MaskFormerModelTester:
        return MaskFormerConfig.from_backbone_and_decoder_configs(
            backbone_config=SwinConfig(
                depths=[1, 1, 1, 1],
+                embed_dim=16,
+                hidden_size=32,
+                num_heads=[1, 1, 2, 2],
            ),
            decoder_config=DetrConfig(
-                decoder_ffn_dim=128,
+                decoder_ffn_dim=64,
+                decoder_layers=2,
+                encoder_ffn_dim=64,
+                encoder_layers=2,
                num_queries=self.num_queries,
                decoder_attention_heads=2,
                d_model=self.mask_feature_size,
@ -224,10 +230,6 @@ class MaskFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
    def test_multi_gpu_data_parallel_forward(self):
        pass

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

--- a/tests/models/mobilevit/test_modeling_mobilevit.py
+++ b/tests/models/mobilevit/test_modeling_mobilevit.py
@ -56,7 +56,7 @@ class MobileViTModelTester:
        image_size=32,
        patch_size=2,
        num_channels=3,
-        last_hidden_size=640,
+        last_hidden_size=32,
        num_attention_heads=4,
        hidden_act="silu",
        conv_kernel_size=3,
@ -115,6 +115,8 @@ class MobileViTModelTester:
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            classifier_dropout_prob=self.classifier_dropout_prob,
            initializer_range=self.initializer_range,
+            hidden_sizes=[12, 16, 20],
+            neck_hidden_sizes=[8, 8, 16, 16, 32, 32, 32],
        )

    def create_and_check_model(self, config, pixel_values, labels, pixel_labels):
@ -231,10 +233,6 @@ class MobileViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
--- a/tests/models/mobilevit/test_modeling_tf_mobilevit.py
+++ b/tests/models/mobilevit/test_modeling_tf_mobilevit.py
@ -59,7 +59,7 @@ class TFMobileViTModelTester:
        image_size=32,
        patch_size=2,
        num_channels=3,
-        last_hidden_size=640,
+        last_hidden_size=32,
        num_attention_heads=4,
        hidden_act="silu",
        conv_kernel_size=3,
@ -118,6 +118,8 @@ class TFMobileViTModelTester:
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            classifier_dropout_prob=self.classifier_dropout_prob,
            initializer_range=self.initializer_range,
+            hidden_sizes=[12, 16, 20],
+            neck_hidden_sizes=[8, 8, 16, 16, 32, 32, 32],
        )

    def create_and_check_model(self, config, pixel_values, labels, pixel_labels):
--- a/tests/models/mobilevitv2/test_modeling_mobilevitv2.py
+++ b/tests/models/mobilevitv2/test_modeling_mobilevitv2.py
@ -115,6 +115,9 @@ class MobileViTV2ModelTester:
            width_multiplier=self.width_multiplier,
            ffn_dropout=self.ffn_dropout_prob,
            attn_dropout=self.attn_dropout_prob,
+            base_attn_unit_dims=[16, 24, 32],
+            n_attn_blocks=[1, 1, 2],
+            aspp_out_channels=32,
        )

    def create_and_check_model(self, config, pixel_values, labels, pixel_labels):
@ -225,10 +228,6 @@ class MobileViTV2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
    def test_multi_gpu_data_parallel_forward(self):
        pass

-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@ -2708,7 +2708,7 @@ class ModelTesterMixin:
    def test_model_is_small(self):
        # Just a consistency check to make sure we are not running tests on 80M parameter models.
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()
-        # print(config)
+        print(config)

        for model_class in self.all_model_classes:
            model = model_class(config)