diff --git a/tests/models/ctrl/test_modeling_ctrl.py b/tests/models/ctrl/test_modeling_ctrl.py
index fd202772391..dfcb2c91338 100644
--- a/tests/models/ctrl/test_modeling_ctrl.py
+++ b/tests/models/ctrl/test_modeling_ctrl.py
@@ -133,7 +133,7 @@ class CTRLModelTester:
             n_embd=self.hidden_size,
             n_layer=self.num_hidden_layers,
             n_head=self.num_attention_heads,
-            # intermediate_size=self.intermediate_size,
+            dff=self.intermediate_size,
             # hidden_act=self.hidden_act,
             # hidden_dropout_prob=self.hidden_dropout_prob,
             # attention_probs_dropout_prob=self.attention_probs_dropout_prob,
@@ -243,10 +243,6 @@ class CTRLModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_lm_head_model(*config_and_inputs)
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     @slow
     def test_model_from_pretrained(self):
         for model_name in CTRL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
diff --git a/tests/models/ctrl/test_modeling_tf_ctrl.py b/tests/models/ctrl/test_modeling_tf_ctrl.py
index 9a5ebbe34a7..01e57bcca37 100644
--- a/tests/models/ctrl/test_modeling_tf_ctrl.py
+++ b/tests/models/ctrl/test_modeling_tf_ctrl.py
@@ -95,7 +95,7 @@ class TFCTRLModelTester(object):
             n_embd=self.hidden_size,
             n_layer=self.num_hidden_layers,
             n_head=self.num_attention_heads,
-            # intermediate_size=self.intermediate_size,
+            dff=self.intermediate_size,
             # hidden_act=self.hidden_act,
             # hidden_dropout_prob=self.hidden_dropout_prob,
             # attention_probs_dropout_prob=self.attention_probs_dropout_prob,
diff --git a/tests/models/cvt/test_modeling_cvt.py b/tests/models/cvt/test_modeling_cvt.py
index ab37e47b513..6f4f63f0f9d 100644
--- a/tests/models/cvt/test_modeling_cvt.py
+++ b/tests/models/cvt/test_modeling_cvt.py
@@ -55,8 +55,8 @@ class CvtModelTester:
         batch_size=13,
         image_size=64,
         num_channels=3,
-        embed_dim=[16, 48, 96],
-        num_heads=[1, 3, 6],
+        embed_dim=[16, 32, 48],
+        num_heads=[1, 2, 3],
         depth=[1, 2, 10],
         patch_sizes=[7, 3, 3],
         patch_stride=[4, 2, 2],
@@ -247,10 +247,6 @@ class CvtModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     @slow
     def test_model_from_pretrained(self):
         for model_name in CVT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
diff --git a/tests/models/cvt/test_modeling_tf_cvt.py b/tests/models/cvt/test_modeling_tf_cvt.py
index d1d5835d7bc..ecb672d422a 100644
--- a/tests/models/cvt/test_modeling_tf_cvt.py
+++ b/tests/models/cvt/test_modeling_tf_cvt.py
@@ -45,8 +45,8 @@ class TFCvtModelTester:
         batch_size=13,
         image_size=64,
         num_channels=3,
-        embed_dim=[16, 48, 96],
-        num_heads=[1, 3, 6],
+        embed_dim=[16, 32, 48],
+        num_heads=[1, 2, 3],
         depth=[1, 2, 10],
         patch_sizes=[7, 3, 3],
         patch_stride=[4, 2, 2],
diff --git a/tests/models/deta/test_modeling_deta.py b/tests/models/deta/test_modeling_deta.py
index 0693b030e29..d5bf32acaba 100644
--- a/tests/models/deta/test_modeling_deta.py
+++ b/tests/models/deta/test_modeling_deta.py
@@ -19,7 +19,7 @@ import inspect
 import math
 import unittest
 
-from transformers import DetaConfig, is_torch_available, is_torchvision_available, is_vision_available
+from transformers import DetaConfig, ResNetConfig, is_torch_available, is_torchvision_available, is_vision_available
 from transformers.file_utils import cached_property
 from transformers.testing_utils import require_torchvision, require_vision, slow, torch_device
 
@@ -49,7 +49,7 @@ class DetaModelTester:
         batch_size=8,
         is_training=True,
         use_labels=True,
-        hidden_size=256,
+        hidden_size=32,
         num_hidden_layers=2,
         num_attention_heads=8,
         intermediate_size=4,
@@ -118,6 +118,16 @@ class DetaModelTester:
         return config, pixel_values, pixel_mask, labels
 
     def get_config(self):
+        resnet_config = ResNetConfig(
+            num_channels=3,
+            embeddings_size=10,
+            hidden_sizes=[10, 20, 30, 40],
+            depths=[1, 1, 2, 1],
+            hidden_act="relu",
+            num_labels=3,
+            out_features=["stage2", "stage3", "stage4"],
+            out_indices=[2, 3, 4],
+        )
         return DetaConfig(
             d_model=self.hidden_size,
             encoder_layers=self.num_hidden_layers,
@@ -134,6 +144,7 @@ class DetaModelTester:
             encoder_n_points=self.encoder_n_points,
             decoder_n_points=self.decoder_n_points,
             two_stage=self.two_stage,
+            backbone_config=resnet_config,
         )
 
     def prepare_config_and_inputs_for_common(self):
@@ -423,10 +434,6 @@ class DetaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
     def test_tied_model_weights_key_ignore(self):
         pass
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     def test_initialization(self):
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
 
diff --git a/tests/models/dpt/test_modeling_dpt.py b/tests/models/dpt/test_modeling_dpt.py
index f9cb66607d0..62ac20df313 100644
--- a/tests/models/dpt/test_modeling_dpt.py
+++ b/tests/models/dpt/test_modeling_dpt.py
@@ -62,6 +62,7 @@ class DPTModelTester:
         attention_probs_dropout_prob=0.1,
         initializer_range=0.02,
         num_labels=3,
+        neck_hidden_sizes=[16, 16, 32, 32],
         is_hybrid=False,
         scope=None,
     ):
@@ -84,6 +85,7 @@ class DPTModelTester:
         self.num_labels = num_labels
         self.scope = scope
         self.is_hybrid = is_hybrid
+        self.neck_hidden_sizes = neck_hidden_sizes
         # sequence length of DPT = num_patches + 1 (we add 1 for the [CLS] token)
         num_patches = (image_size // patch_size) ** 2
         self.seq_length = num_patches + 1
@@ -105,6 +107,7 @@ class DPTModelTester:
             patch_size=self.patch_size,
             num_channels=self.num_channels,
             hidden_size=self.hidden_size,
+            fusion_hidden_size=self.hidden_size,
             num_hidden_layers=self.num_hidden_layers,
             backbone_out_indices=self.backbone_out_indices,
             num_attention_heads=self.num_attention_heads,
@@ -115,6 +118,7 @@ class DPTModelTester:
             is_decoder=False,
             initializer_range=self.initializer_range,
             is_hybrid=self.is_hybrid,
+            neck_hidden_sizes=self.neck_hidden_sizes,
         )
 
     def create_and_check_model(self, config, pixel_values, labels):
@@ -275,10 +279,6 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
                         msg=f"Parameter {name} of model {model_class} seems not properly initialized",
                     )
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     @slow
     def test_model_from_pretrained(self):
         for model_name in DPT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
diff --git a/tests/models/dpt/test_modeling_dpt_hybrid.py b/tests/models/dpt/test_modeling_dpt_hybrid.py
index 4c32c76e865..7270f609c2b 100644
--- a/tests/models/dpt/test_modeling_dpt_hybrid.py
+++ b/tests/models/dpt/test_modeling_dpt_hybrid.py
@@ -62,7 +62,8 @@ class DPTModelTester:
         attention_probs_dropout_prob=0.1,
         initializer_range=0.02,
         num_labels=3,
-        backbone_featmap_shape=[1, 384, 24, 24],
+        backbone_featmap_shape=[1, 32, 24, 24],
+        neck_hidden_sizes=[16, 16, 32, 32],
         is_hybrid=True,
         scope=None,
     ):
@@ -86,6 +87,7 @@ class DPTModelTester:
         self.backbone_featmap_shape = backbone_featmap_shape
         self.scope = scope
         self.is_hybrid = is_hybrid
+        self.neck_hidden_sizes = neck_hidden_sizes
         # sequence length of DPT = num_patches + 1 (we add 1 for the [CLS] token)
         num_patches = (image_size // patch_size) ** 2
         self.seq_length = num_patches + 1
@@ -108,7 +110,7 @@ class DPTModelTester:
             "depths": [3, 4, 9],
             "out_features": ["stage1", "stage2", "stage3"],
             "embedding_dynamic_padding": True,
-            "hidden_sizes": [96, 192, 384, 768],
+            "hidden_sizes": [16, 16, 32, 32],
             "num_groups": 2,
         }
 
@@ -117,6 +119,7 @@ class DPTModelTester:
             patch_size=self.patch_size,
             num_channels=self.num_channels,
             hidden_size=self.hidden_size,
+            fusion_hidden_size=self.hidden_size,
             num_hidden_layers=self.num_hidden_layers,
             backbone_out_indices=self.backbone_out_indices,
             num_attention_heads=self.num_attention_heads,
@@ -129,6 +132,7 @@ class DPTModelTester:
             is_hybrid=self.is_hybrid,
             backbone_config=backbone_config,
             backbone_featmap_shape=self.backbone_featmap_shape,
+            neck_hidden_sizes=self.neck_hidden_sizes,
         )
 
     def create_and_check_model(self, config, pixel_values, labels):
@@ -289,10 +293,6 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
                         msg=f"Parameter {name} of model {model_class} seems not properly initialized",
                     )
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     @slow
     def test_model_from_pretrained(self):
         for model_name in DPT_PRETRAINED_MODEL_ARCHIVE_LIST[1:]:
diff --git a/tests/models/efficientnet/test_modeling_efficientnet.py b/tests/models/efficientnet/test_modeling_efficientnet.py
index e77c17a7a6a..52a7ec4a7dd 100644
--- a/tests/models/efficientnet/test_modeling_efficientnet.py
+++ b/tests/models/efficientnet/test_modeling_efficientnet.py
@@ -49,7 +49,7 @@ class EfficientNetModelTester:
         num_channels=3,
         kernel_sizes=[3, 3, 5],
         in_channels=[32, 16, 24],
-        out_channels=[16, 24, 40],
+        out_channels=[16, 24, 20],
         strides=[1, 1, 2],
         num_block_repeats=[1, 1, 2],
         expand_ratios=[1, 6, 6],
@@ -223,10 +223,6 @@ class EfficientNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Test
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     @slow
     def test_model_from_pretrained(self):
         for model_name in EFFICIENTNET_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
diff --git a/tests/models/encodec/test_modeling_encodec.py b/tests/models/encodec/test_modeling_encodec.py
index b8883314619..8f1b06da06c 100644
--- a/tests/models/encodec/test_modeling_encodec.py
+++ b/tests/models/encodec/test_modeling_encodec.py
@@ -77,16 +77,25 @@ class EncodecModelTester:
         batch_size=12,
         num_channels=2,
         is_training=False,
-        num_hidden_layers=4,
         intermediate_size=40,
+        hidden_size=32,
+        num_filters=8,
+        num_residual_layers=1,
+        upsampling_ratios=[8, 4],
+        num_lstm_layers=1,
+        codebook_size=64,
     ):
         self.parent = parent
         self.batch_size = batch_size
         self.num_channels = num_channels
         self.is_training = is_training
-
-        self.num_hidden_layers = num_hidden_layers
         self.intermediate_size = intermediate_size
+        self.hidden_size = hidden_size
+        self.num_filters = num_filters
+        self.num_residual_layers = num_residual_layers
+        self.upsampling_ratios = upsampling_ratios
+        self.num_lstm_layers = num_lstm_layers
+        self.codebook_size = codebook_size
 
     def prepare_config_and_inputs(self):
         input_values = floats_tensor([self.batch_size, self.num_channels, self.intermediate_size], scale=1.0)
@@ -99,7 +108,16 @@ class EncodecModelTester:
         return config, inputs_dict
 
     def get_config(self):
-        return EncodecConfig(audio_channels=self.num_channels, chunk_in_sec=None)
+        return EncodecConfig(
+            audio_channels=self.num_channels,
+            chunk_in_sec=None,
+            hidden_size=self.hidden_size,
+            num_filters=self.num_filters,
+            num_residual_layers=self.num_residual_layers,
+            upsampling_ratios=self.upsampling_ratios,
+            num_lstm_layers=self.num_lstm_layers,
+            codebook_size=self.codebook_size,
+        )
 
     def create_and_check_model_forward(self, config, inputs_dict):
         model = EncodecModel(config=config).to(torch_device).eval()
@@ -397,10 +415,6 @@ class EncodecModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
                             msg=f"Parameter {name} of model {model_class} seems not properly initialized",
                         )
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     def test_identity_shortcut(self):
         config, inputs_dict = self.model_tester.prepare_config_and_inputs()
         config.use_conv_shortcut = False
diff --git a/tests/models/esm/test_modeling_esm.py b/tests/models/esm/test_modeling_esm.py
index fc1879e6bf4..2e5d48082be 100644
--- a/tests/models/esm/test_modeling_esm.py
+++ b/tests/models/esm/test_modeling_esm.py
@@ -279,10 +279,6 @@ class EsmModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
     def test_resize_tokens_embeddings(self):
         pass
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
 
 @require_torch
 class EsmModelIntegrationTest(TestCasePlus):
diff --git a/tests/models/esm/test_modeling_esmfold.py b/tests/models/esm/test_modeling_esmfold.py
index bc5c10ae242..39f274af54d 100644
--- a/tests/models/esm/test_modeling_esmfold.py
+++ b/tests/models/esm/test_modeling_esmfold.py
@@ -100,6 +100,28 @@ class EsmFoldModelTester:
         return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
 
     def get_config(self):
+        esmfold_config = {
+            "trunk": {
+                "num_blocks": 2,
+                "sequence_state_dim": 64,
+                "pairwise_state_dim": 16,
+                "sequence_head_width": 4,
+                "pairwise_head_width": 4,
+                "position_bins": 4,
+                "chunk_size": 16,
+                "structure_module": {
+                    "ipa_dim": 16,
+                    "num_angles": 7,
+                    "num_blocks": 2,
+                    "num_heads_ipa": 4,
+                    "pairwise_dim": 16,
+                    "resnet_dim": 16,
+                    "sequence_dim": 48,
+                },
+            },
+            "fp16_esm": False,
+            "lddt_head_hid_dim": 16,
+        }
         config = EsmConfig(
             vocab_size=33,
             hidden_size=self.hidden_size,
@@ -114,7 +136,7 @@ class EsmFoldModelTester:
             type_vocab_size=self.type_vocab_size,
             initializer_range=self.initializer_range,
             is_folding_model=True,
-            esmfold_config={"trunk": {"num_blocks": 2}, "fp16_esm": False},
+            esmfold_config=esmfold_config,
         )
         return config
 
@@ -126,8 +148,8 @@ class EsmFoldModelTester:
         result = model(input_ids)
         result = model(input_ids)
 
-        self.parent.assertEqual(result.positions.shape, (8, self.batch_size, self.seq_length, 14, 3))
-        self.parent.assertEqual(result.angles.shape, (8, self.batch_size, self.seq_length, 7, 2))
+        self.parent.assertEqual(result.positions.shape, (2, self.batch_size, self.seq_length, 14, 3))
+        self.parent.assertEqual(result.angles.shape, (2, self.batch_size, self.seq_length, 7, 2))
 
     def prepare_config_and_inputs_for_common(self):
         config_and_inputs = self.prepare_config_and_inputs()
@@ -243,10 +265,6 @@ class EsmFoldModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
     def test_multi_gpu_data_parallel_forward(self):
         pass
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
 
 @require_torch
 class EsmModelIntegrationTest(TestCasePlus):
diff --git a/tests/models/flava/test_modeling_flava.py b/tests/models/flava/test_modeling_flava.py
index cef39224da3..f1221f1061c 100644
--- a/tests/models/flava/test_modeling_flava.py
+++ b/tests/models/flava/test_modeling_flava.py
@@ -92,7 +92,7 @@ class FlavaImageModelTester:
         num_channels=3,
         qkv_bias=True,
         mask_token=True,
-        vocab_size=8192,
+        vocab_size=99,
     ):
         self.parent = parent
         self.batch_size = batch_size
@@ -321,10 +321,6 @@ class FlavaImageModelTest(ModelTesterMixin, unittest.TestCase):
     def test_save_load_fast_init_to_base(self):
         pass
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     @slow
     def test_model_from_pretrained(self):
         for model_name in FLAVA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
@@ -341,7 +337,7 @@ class FlavaTextModelTester:
         is_training=True,
         use_input_mask=True,
         use_token_type_ids=True,
-        vocab_size=30522,
+        vocab_size=102,
         type_vocab_size=2,
         max_position_embeddings=512,
         position_embedding_type="absolute",
@@ -476,10 +472,6 @@ class FlavaTextModelTest(ModelTesterMixin, unittest.TestCase):
     def test_save_load_fast_init_to_base(self):
         pass
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     @slow
     def test_model_from_pretrained(self):
         for model_name in FLAVA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
@@ -632,10 +624,6 @@ class FlavaMultimodalModelTest(ModelTesterMixin, unittest.TestCase):
     def test_save_load_fast_init_to_base(self):
         pass
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     @slow
     def test_model_from_pretrained(self):
         for model_name in FLAVA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
@@ -644,11 +632,23 @@ class FlavaMultimodalModelTest(ModelTesterMixin, unittest.TestCase):
 
 
 class FlavaImageCodebookTester:
-    def __init__(self, parent, batch_size=12, image_size=112, num_channels=3):
+    def __init__(
+        self,
+        parent,
+        batch_size=12,
+        image_size=112,
+        num_channels=3,
+        hidden_size=32,
+        num_groups=2,
+        vocab_size=99,
+    ):
         self.parent = parent
         self.batch_size = batch_size
         self.image_size = image_size
         self.num_channels = num_channels
+        self.hidden_size = hidden_size
+        self.num_groups = num_groups
+        self.vocab_size = vocab_size
 
     def prepare_config_and_inputs(self):
         pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
@@ -657,7 +657,9 @@ class FlavaImageCodebookTester:
         return config, pixel_values
 
     def get_config(self):
-        return FlavaImageCodebookConfig()
+        return FlavaImageCodebookConfig(
+            hidden_size=self.hidden_size, num_groups=self.num_groups, vocab_size=self.vocab_size
+        )
 
     def create_and_check_model(self, config, pixel_values):
         model = FlavaImageCodebook(config=config)
@@ -743,10 +745,6 @@ class FlavaImageCodebookTest(ModelTesterMixin, unittest.TestCase):
     def test_save_load_fast_init_to_base(self):
         pass
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     @slow
     def test_model_from_pretrained(self):
         for model_name in FLAVA_CODEBOOK_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
@@ -929,10 +927,6 @@ class FlavaModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
                             msg=f"Parameter {name} of model {model_class} seems not properly initialized",
                         )
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     def _create_and_check_torchscript(self, config, inputs_dict):
         if not self.test_torchscript:
             return
diff --git a/tests/models/git/test_modeling_git.py b/tests/models/git/test_modeling_git.py
index 5997230b16e..ed094db4a05 100644
--- a/tests/models/git/test_modeling_git.py
+++ b/tests/models/git/test_modeling_git.py
@@ -203,7 +203,7 @@ class GitModelTester:
         use_labels=True,
         vocab_size=99,
         hidden_size=32,
-        num_hidden_layers=5,
+        num_hidden_layers=4,
         num_attention_heads=4,
         intermediate_size=37,
         hidden_act="gelu",
@@ -268,6 +268,10 @@ class GitModelTester:
                 "num_channels": self.num_channels,
                 "image_size": self.image_size,
                 "patch_size": self.patch_size,
+                "hidden_size": self.hidden_size,
+                "projection_dim": 32,
+                "num_hidden_layers": self.num_hidden_layers,
+                "num_attention_heads": self.num_attention_heads,
             },
             vocab_size=self.vocab_size,
             hidden_size=self.hidden_size,
@@ -454,10 +458,6 @@ class GitModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
     def test_greedy_generate_dict_outputs_use_cache(self):
         pass
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
 
 @require_torch
 @require_vision
diff --git a/tests/models/gptsan_japanese/test_modeling_gptsan_japanese.py b/tests/models/gptsan_japanese/test_modeling_gptsan_japanese.py
index 0738b294c03..54a98cf70fd 100644
--- a/tests/models/gptsan_japanese/test_modeling_gptsan_japanese.py
+++ b/tests/models/gptsan_japanese/test_modeling_gptsan_japanese.py
@@ -38,7 +38,7 @@ class GPTSanJapaneseTester:
     def __init__(
         self,
         parent,
-        vocab_size=36000,
+        vocab_size=99,
         batch_size=13,
         num_contexts=7,
         # For common tests
@@ -182,10 +182,6 @@ class GPTSanJapaneseTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
     def test_model_parallelism(self):
         super().test_model_parallelism()
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
 
 @require_torch
 class GPTSanJapaneseForConditionalGenerationTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
@@ -216,10 +212,6 @@ class GPTSanJapaneseForConditionalGenerationTest(ModelTesterMixin, GenerationTes
     def test_model_parallelism(self):
         super().test_model_parallelism()
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     @slow
     def test_logits(self):
         model = GPTSanJapaneseForConditionalGeneration.from_pretrained("Tanrei/GPTSAN-japanese")
diff --git a/tests/models/graphormer/test_modeling_graphormer.py b/tests/models/graphormer/test_modeling_graphormer.py
index 60f188f3b2b..b6a994f4597 100644
--- a/tests/models/graphormer/test_modeling_graphormer.py
+++ b/tests/models/graphormer/test_modeling_graphormer.py
@@ -42,22 +42,22 @@ class GraphormerModelTester:
         self,
         parent,
         num_classes=1,
-        num_atoms=512 * 9,
-        num_edges=512 * 3,
-        num_in_degree=512,
-        num_out_degree=512,
-        num_spatial=512,
-        num_edge_dis=128,
+        num_atoms=32 * 9,
+        num_edges=32 * 3,
+        num_in_degree=32,
+        num_out_degree=32,
+        num_spatial=32,
+        num_edge_dis=16,
         multi_hop_max_dist=5,  # sometimes is 20
-        spatial_pos_max=1024,
+        spatial_pos_max=32,
         edge_type="multi_hop",
         init_fn=None,
-        max_nodes=512,
+        max_nodes=32,
         share_input_output_embed=False,
-        num_hidden_layers=12,
-        embedding_dim=768,
-        ffn_embedding_dim=768,
-        num_attention_heads=32,
+        num_hidden_layers=2,
+        embedding_dim=32,
+        ffn_embedding_dim=32,
+        num_attention_heads=4,
         dropout=0.1,
         attention_dropout=0.1,
         activation_dropout=0.1,
@@ -470,10 +470,6 @@ class GraphormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_for_graph_classification(*config_and_inputs)
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     @slow
     def test_model_from_pretrained(self):
         for model_name in GRAPHORMER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
diff --git a/tests/models/levit/test_modeling_levit.py b/tests/models/levit/test_modeling_levit.py
index b78554374e3..0e46f6f56dd 100644
--- a/tests/models/levit/test_modeling_levit.py
+++ b/tests/models/levit/test_modeling_levit.py
@@ -67,10 +67,10 @@ class LevitModelTester:
         stride=2,
         padding=1,
         patch_size=16,
-        hidden_sizes=[128, 256, 384],
-        num_attention_heads=[4, 6, 8],
+        hidden_sizes=[16, 32, 48],
+        num_attention_heads=[1, 2, 3],
         depths=[2, 3, 4],
-        key_dim=[16, 16, 16],
+        key_dim=[8, 8, 8],
         drop_path_rate=0,
         mlp_ratio=[2, 2, 2],
         attention_ratio=[2, 2, 2],
@@ -282,10 +282,6 @@ class LevitModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
 
             check_hidden_states_output(inputs_dict, config, model_class)
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
         inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
 
diff --git a/tests/models/mask2former/test_modeling_mask2former.py b/tests/models/mask2former/test_modeling_mask2former.py
index c492bbb7664..898f2199922 100644
--- a/tests/models/mask2former/test_modeling_mask2former.py
+++ b/tests/models/mask2former/test_modeling_mask2former.py
@@ -54,6 +54,8 @@ class Mask2FormerModelTester:
         max_size=32 * 8,
         num_labels=4,
         hidden_dim=64,
+        num_attention_heads=4,
+        num_hidden_layers=2,
     ):
         self.parent = parent
         self.batch_size = batch_size
@@ -66,6 +68,8 @@ class Mask2FormerModelTester:
         self.num_labels = num_labels
         self.hidden_dim = hidden_dim
         self.mask_feature_size = hidden_dim
+        self.num_attention_heads = num_attention_heads
+        self.num_hidden_layers = num_hidden_layers
 
     def prepare_config_and_inputs(self):
         pixel_values = floats_tensor([self.batch_size, self.num_channels, self.min_size, self.max_size]).to(
@@ -85,15 +89,25 @@ class Mask2FormerModelTester:
     def get_config(self):
         config = Mask2FormerConfig(
             hidden_size=self.hidden_dim,
+            num_attention_heads=self.num_attention_heads,
+            num_hidden_layers=self.num_hidden_layers,
+            encoder_feedforward_dim=16,
+            dim_feedforward=32,
+            num_queries=self.num_queries,
+            num_labels=self.num_labels,
+            decoder_layers=2,
+            encoder_layers=2,
+            feature_size=16,
         )
         config.num_queries = self.num_queries
         config.num_labels = self.num_labels
 
+        config.backbone_config.embed_dim = 16
         config.backbone_config.depths = [1, 1, 1, 1]
+        config.backbone_config.hidden_size = 16
         config.backbone_config.num_channels = self.num_channels
+        config.backbone_config.num_heads = [1, 1, 2, 2]
 
-        config.encoder_feedforward_dim = 64
-        config.dim_feedforward = 128
         config.hidden_dim = self.hidden_dim
         config.mask_feature_size = self.hidden_dim
         config.feature_size = self.hidden_dim
@@ -220,10 +234,6 @@ class Mask2FormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
     def test_multi_gpu_data_parallel_forward(self):
         pass
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     def test_forward_signature(self):
         config, _ = self.model_tester.prepare_config_and_inputs_for_common()
 
diff --git a/tests/models/maskformer/test_modeling_maskformer.py b/tests/models/maskformer/test_modeling_maskformer.py
index 69cf21d566e..c69a0c94ced 100644
--- a/tests/models/maskformer/test_modeling_maskformer.py
+++ b/tests/models/maskformer/test_modeling_maskformer.py
@@ -85,9 +85,15 @@ class MaskFormerModelTester:
         return MaskFormerConfig.from_backbone_and_decoder_configs(
             backbone_config=SwinConfig(
                 depths=[1, 1, 1, 1],
+                embed_dim=16,
+                hidden_size=32,
+                num_heads=[1, 1, 2, 2],
             ),
             decoder_config=DetrConfig(
-                decoder_ffn_dim=128,
+                decoder_ffn_dim=64,
+                decoder_layers=2,
+                encoder_ffn_dim=64,
+                encoder_layers=2,
                 num_queries=self.num_queries,
                 decoder_attention_heads=2,
                 d_model=self.mask_feature_size,
@@ -224,10 +230,6 @@ class MaskFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
     def test_multi_gpu_data_parallel_forward(self):
         pass
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     def test_forward_signature(self):
         config, _ = self.model_tester.prepare_config_and_inputs_for_common()
 
diff --git a/tests/models/mobilevit/test_modeling_mobilevit.py b/tests/models/mobilevit/test_modeling_mobilevit.py
index 350934ad051..2c01ea0c99b 100644
--- a/tests/models/mobilevit/test_modeling_mobilevit.py
+++ b/tests/models/mobilevit/test_modeling_mobilevit.py
@@ -56,7 +56,7 @@ class MobileViTModelTester:
         image_size=32,
         patch_size=2,
         num_channels=3,
-        last_hidden_size=640,
+        last_hidden_size=32,
         num_attention_heads=4,
         hidden_act="silu",
         conv_kernel_size=3,
@@ -115,6 +115,8 @@ class MobileViTModelTester:
             attention_probs_dropout_prob=self.attention_probs_dropout_prob,
             classifier_dropout_prob=self.classifier_dropout_prob,
             initializer_range=self.initializer_range,
+            hidden_sizes=[12, 16, 20],
+            neck_hidden_sizes=[8, 8, 16, 16, 32, 32, 32],
         )
 
     def create_and_check_model(self, config, pixel_values, labels, pixel_labels):
@@ -231,10 +233,6 @@ class MobileViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
             expected_arg_names = ["pixel_values"]
             self.assertListEqual(arg_names[:1], expected_arg_names)
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     def test_model(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_model(*config_and_inputs)
diff --git a/tests/models/mobilevit/test_modeling_tf_mobilevit.py b/tests/models/mobilevit/test_modeling_tf_mobilevit.py
index 4a3c4484d28..289d739774a 100644
--- a/tests/models/mobilevit/test_modeling_tf_mobilevit.py
+++ b/tests/models/mobilevit/test_modeling_tf_mobilevit.py
@@ -59,7 +59,7 @@ class TFMobileViTModelTester:
         image_size=32,
         patch_size=2,
         num_channels=3,
-        last_hidden_size=640,
+        last_hidden_size=32,
         num_attention_heads=4,
         hidden_act="silu",
         conv_kernel_size=3,
@@ -118,6 +118,8 @@ class TFMobileViTModelTester:
             attention_probs_dropout_prob=self.attention_probs_dropout_prob,
             classifier_dropout_prob=self.classifier_dropout_prob,
             initializer_range=self.initializer_range,
+            hidden_sizes=[12, 16, 20],
+            neck_hidden_sizes=[8, 8, 16, 16, 32, 32, 32],
         )
 
     def create_and_check_model(self, config, pixel_values, labels, pixel_labels):
diff --git a/tests/models/mobilevitv2/test_modeling_mobilevitv2.py b/tests/models/mobilevitv2/test_modeling_mobilevitv2.py
index 7f5c332a616..b1961b2e6d4 100644
--- a/tests/models/mobilevitv2/test_modeling_mobilevitv2.py
+++ b/tests/models/mobilevitv2/test_modeling_mobilevitv2.py
@@ -115,6 +115,9 @@ class MobileViTV2ModelTester:
             width_multiplier=self.width_multiplier,
             ffn_dropout=self.ffn_dropout_prob,
             attn_dropout=self.attn_dropout_prob,
+            base_attn_unit_dims=[16, 24, 32],
+            n_attn_blocks=[1, 1, 2],
+            aspp_out_channels=32,
         )
 
     def create_and_check_model(self, config, pixel_values, labels, pixel_labels):
@@ -225,10 +228,6 @@ class MobileViTV2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
     def test_multi_gpu_data_parallel_forward(self):
         pass
 
-    @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
-    def test_model_is_small(self):
-        pass
-
     def test_forward_signature(self):
         config, _ = self.model_tester.prepare_config_and_inputs_for_common()
 
diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
index 0d5080ec5aa..a394723e53c 100755
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -2708,7 +2708,7 @@ class ModelTesterMixin:
     def test_model_is_small(self):
         # Just a consistency check to make sure we are not running tests on 80M parameter models.
         config, _ = self.model_tester.prepare_config_and_inputs_for_common()
-        # print(config)
+        print(config)
 
         for model_class in self.all_model_classes:
             model = model_class(config)