Skip tests properly (#31308)

* Skip tests properly * [test_all] * Add 'reason' as kwarg for skipTest * [test_all] Fix up * [test_all]
2025-07-04 13:20:12 +06:00 · 2024-06-26 21:59:08 +01:00 · 2024-06-26 21:59:08 +01:00 · 1de7dc7403
commit 1de7dc7403
parent 1f9f57ab4c
254 changed files with 1721 additions and 1298 deletions
--- a/docs/source/de/testing.md
+++ b/docs/source/de/testing.md
@ -862,7 +862,7 @@ Code, der fehlerhaft ist, einen schlechten Zustand verursacht, der sich auf ande
 - Hier sehen Sie, wie Sie einen ganzen Test bedingungslos überspringen können:
 ```python no-style
-@unittest.skip("this bug needs to be fixed")
+@unittest.skip(reason="this bug needs to be fixed")
 def test_feature_x():
 ```
--- a/docs/source/en/testing.md
+++ b/docs/source/en/testing.md
@ -881,7 +881,7 @@ code that's buggy causes some bad state that will affect other tests, do not use
 - Here is how to skip whole test unconditionally:
 ```python no-style
-@unittest.skip("this bug needs to be fixed")
+@unittest.skip(reason="this bug needs to be fixed")
 def test_feature_x():
 ```
--- a/docs/source/ja/testing.md
+++ b/docs/source/ja/testing.md
@ -809,7 +809,7 @@ with ExtendSysPath(f"{bindir}/.."):
 ```python no-style
-@unittest.skip("this bug needs to be fixed")
+@unittest.skip(reason="this bug needs to be fixed")
 def test_feature_x():
 ```
@ -1211,4 +1211,3 @@ cmd_that_may_fail || true
 - [Github Actions:](https://github.com/actions/toolkit/issues/399)
 - [CircleCI:](https://ideas.circleci.com/ideas/CCI-I-344)
--- a/docs/source/ko/testing.md
+++ b/docs/source/ko/testing.md
@ -847,7 +847,7 @@ with ExtendSysPath(f"{bindir}/.."):
 - 전체 테스트를 무조건 건너뛰려면 다음과 같이 할 수 있습니다:
 ```python no-style
-@unittest.skip("this bug needs to be fixed")
+@unittest.skip(reason="this bug needs to be fixed")
 def test_feature_x():
 ```
--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@ -226,7 +226,7 @@ def is_pt_tf_cross_test(test_case):
    """
    if not _run_pt_tf_cross_tests or not is_torch_available() or not is_tf_available():
-        return unittest.skip("test is PT+TF test")(test_case)
+        return unittest.skip(reason="test is PT+TF test")(test_case)
    else:
        try:
            import pytest  # We don't need a hard dependency on pytest in the main library
@ -245,7 +245,7 @@ def is_pt_flax_cross_test(test_case):
    """
    if not _run_pt_flax_cross_tests or not is_torch_available() or not is_flax_available():
-        return unittest.skip("test is PT+FLAX test")(test_case)
+        return unittest.skip(reason="test is PT+FLAX test")(test_case)
    else:
        try:
            import pytest  # We don't need a hard dependency on pytest in the main library
@ -262,7 +262,7 @@ def is_staging_test(test_case):
    Those tests will run using the staging environment of huggingface.co instead of the real model hub.
    """
    if not _run_staging:
-        return unittest.skip("test is staging test")(test_case)
+        return unittest.skip(reason="test is staging test")(test_case)
    else:
        try:
            import pytest  # We don't need a hard dependency on pytest in the main library
@ -278,7 +278,7 @@ def is_pipeline_test(test_case):
    skipped.
    """
    if not _run_pipeline_tests:
-        return unittest.skip("test is pipeline test")(test_case)
+        return unittest.skip(reason="test is pipeline test")(test_case)
    else:
        try:
            import pytest  # We don't need a hard dependency on pytest in the main library
@ -293,7 +293,7 @@ def is_agent_test(test_case):
    Decorator marking a test as an agent test. If RUN_TOOL_TESTS is set to a falsy value, those tests will be skipped.
    """
    if not _run_agent_tests:
-        return unittest.skip("test is an agent test")(test_case)
+        return unittest.skip(reason="test is an agent test")(test_case)
    else:
        try:
            import pytest  # We don't need a hard dependency on pytest in the main library
@ -321,7 +321,7 @@ def tooslow(test_case):
    these will not be tested by the CI.
    """
-    return unittest.skip("test is too slow")(test_case)
+    return unittest.skip(reason="test is too slow")(test_case)
 def custom_tokenizers(test_case):
@ -709,7 +709,7 @@ def require_torch_multi_gpu(test_case):
    To run *only* the multi_gpu tests, assuming all test names contain multi_gpu: $ pytest -sv ./tests -k "multi_gpu"
    """
    if not is_torch_available():
-        return unittest.skip("test requires PyTorch")(test_case)
+        return unittest.skip(reason="test requires PyTorch")(test_case)
    import torch
@ -723,7 +723,7 @@ def require_torch_multi_accelerator(test_case):
    multi_accelerator: $ pytest -sv ./tests -k "multi_accelerator"
    """
    if not is_torch_available():
-        return unittest.skip("test requires PyTorch")(test_case)
+        return unittest.skip(reason="test requires PyTorch")(test_case)
    return unittest.skipUnless(backend_device_count(torch_device) > 1, "test requires multiple accelerators")(
        test_case
@ -735,7 +735,7 @@ def require_torch_non_multi_gpu(test_case):
    Decorator marking a test that requires 0 or 1 GPU setup (in PyTorch).
    """
    if not is_torch_available():
-        return unittest.skip("test requires PyTorch")(test_case)
+        return unittest.skip(reason="test requires PyTorch")(test_case)
    import torch
@ -747,7 +747,7 @@ def require_torch_non_multi_accelerator(test_case):
    Decorator marking a test that requires 0 or 1 accelerator setup (in PyTorch).
    """
    if not is_torch_available():
-        return unittest.skip("test requires PyTorch")(test_case)
+        return unittest.skip(reason="test requires PyTorch")(test_case)
    return unittest.skipUnless(backend_device_count(torch_device) < 2, "test requires 0 or 1 accelerator")(test_case)
@ -757,7 +757,7 @@ def require_torch_up_to_2_gpus(test_case):
    Decorator marking a test that requires 0 or 1 or 2 GPU setup (in PyTorch).
    """
    if not is_torch_available():
-        return unittest.skip("test requires PyTorch")(test_case)
+        return unittest.skip(reason="test requires PyTorch")(test_case)
    import torch
@ -769,7 +769,7 @@ def require_torch_up_to_2_accelerators(test_case):
    Decorator marking a test that requires 0 or 1 or 2 accelerator setup (in PyTorch).
    """
    if not is_torch_available():
-        return unittest.skip("test requires PyTorch")(test_case)
+        return unittest.skip(reason="test requires PyTorch")(test_case)
    return unittest.skipUnless(backend_device_count(torch_device) < 3, "test requires 0 or 1 or 2 accelerators")
    (test_case)
@ -806,7 +806,7 @@ def require_torch_multi_npu(test_case):
    To run *only* the multi_npu tests, assuming all test names contain multi_npu: $ pytest -sv ./tests -k "multi_npu"
    """
    if not is_torch_npu_available():
-        return unittest.skip("test requires PyTorch NPU")(test_case)
+        return unittest.skip(reason="test requires PyTorch NPU")(test_case)
    return unittest.skipUnless(torch.npu.device_count() > 1, "test requires multiple NPUs")(test_case)
@ -830,7 +830,7 @@ def require_torch_multi_xpu(test_case):
    To run *only* the multi_xpu tests, assuming all test names contain multi_xpu: $ pytest -sv ./tests -k "multi_xpu"
    """
    if not is_torch_xpu_available():
-        return unittest.skip("test requires PyTorch XPU")(test_case)
+        return unittest.skip(reason="test requires PyTorch XPU")(test_case)
    return unittest.skipUnless(torch.xpu.device_count() > 1, "test requires multiple XPUs")(test_case)
@ -1078,7 +1078,7 @@ def require_bitsandbytes(test_case):
        except ImportError:
            return test_case
    else:
-        return unittest.skip("test requires bitsandbytes and torch")(test_case)
+        return unittest.skip(reason="test requires bitsandbytes and torch")(test_case)
 def require_optimum(test_case):
--- a/tests/deepspeed/test_deepspeed.py
+++ b/tests/deepspeed/test_deepspeed.py
@ -108,13 +108,13 @@ def require_deepspeed_aio(test_case):
    Decorator marking a test that requires deepspeed aio (nvme)
    """
    if not is_deepspeed_available():
-        return unittest.skip("test requires deepspeed")(test_case)
+        return unittest.skip(reason="test requires deepspeed")(test_case)
    import deepspeed
    from deepspeed.ops.aio import AsyncIOBuilder
    if not deepspeed.ops.__compatible_ops__[AsyncIOBuilder.NAME]:
-        return unittest.skip("test requires deepspeed async-io")(test_case)
+        return unittest.skip(reason="test requires deepspeed async-io")(test_case)
    else:
        return test_case
@ -643,7 +643,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
            # print(trainer.model.b.item())
            # need to investigate at some point
            if (stage == ZERO3 and dtype == FP16) or (dtype == BF16):
-                return
+                self.skipTest(reason="When using zero3/fp16 or any/bf16 the optimizer seems run oddly")
            # it's enough that train didn't fail for this test, but we must check that
            # optimizer/scheduler didn't run (since if it did this test isn't testing the right thing)
@ -795,7 +795,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
        # ToDo: Currently, hf_optim + hf_scheduler resumes with the correct states and
        # also has same losses for few steps but then slowly diverges. Need to figure it out.
        if optim == HF_OPTIM and scheduler == HF_SCHEDULER:
-            return
+            self.skipTest(reason="hf_optim + hf_scheduler resumes with the correct states but slowly diverges")
        output_dir = self.get_auto_remove_tmp_dir("./xxx", after=False)
        ds_config_dict = self.get_config_dict(stage)
@ -1113,7 +1113,7 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
    @require_torch_multi_accelerator
    def test_inference(self, dtype):
        if dtype == "bf16" and not is_torch_bf16_available_on_device(torch_device):
-            self.skipTest("test requires bfloat16 hardware support")
+            self.skipTest(reason="test requires bfloat16 hardware support")
        # this is just inference, so no optimizer should be loaded
        # it only works for z3 (makes no sense with z1-z2)
--- a/tests/extended/test_trainer_ext.py
+++ b/tests/extended/test_trainer_ext.py
@ -80,7 +80,7 @@ class TestTrainerExt(TestCasePlus):
        logs = TrainerState.load_from_json(os.path.join(output_dir, "trainer_state.json")).log_history
        if not do_eval:
-            return
+            self.skipTest(reason="do_eval is False")
        eval_metrics = [log for log in logs if "eval_loss" in log.keys()]
--- a/tests/generation/test_utils.py
+++ b/tests/generation/test_utils.py
@ -463,9 +463,9 @@ class GenerationTesterMixin:
            config, input_ids, attention_mask = self._get_input_ids_and_config()
            if not hasattr(config, "use_cache"):
-                self.skipTest("This model doesn't support caching")
+                self.skipTest(reason="This model doesn't support caching")
            if any(model_name in model_class.__name__.lower() for model_name in ["rwkv"]):
-                self.skipTest("Won't fix: model with non-standard dictionary output shapes")
+                self.skipTest(reason="Won't fix: model with non-standard dictionary output shapes")
            config.use_cache = True
            config.is_decoder = True
@ -625,9 +625,9 @@ class GenerationTesterMixin:
            config, input_ids, attention_mask = self._get_input_ids_and_config()
            if not hasattr(config, "use_cache"):
-                self.skipTest("This model doesn't support caching")
+                self.skipTest(reason="This model doesn't support caching")
            if any(model_name in model_class.__name__.lower() for model_name in ["rwkv"]):
-                self.skipTest("Won't fix: model with non-standard dictionary output shapes")
+                self.skipTest(reason="Won't fix: model with non-standard dictionary output shapes")
            model = model_class(config).to(torch_device).eval()
            logits_process_kwargs, _ = self._get_logits_processor_and_warper_kwargs(
@ -667,7 +667,7 @@ class GenerationTesterMixin:
    def test_model_parallel_beam_search(self):
        for model_class in self.all_generative_model_classes:
            if "xpu" in torch_device:
-                return unittest.skip("device_map='auto' does not work with XPU devices")
+                return unittest.skip(reason="device_map='auto' does not work with XPU devices")
            if model_class._no_split_modules is None:
                continue
@ -765,7 +765,7 @@ class GenerationTesterMixin:
        # if no bos token id => cannot generate from None
        if config.bos_token_id is None:
-            return
+            self.skipTest(reason="bos_token_id is None")
        # hack in case they are equal, otherwise the attn mask will be [0]
        if config.bos_token_id == config.pad_token_id:
@ -982,17 +982,17 @@ class GenerationTesterMixin:
    def test_contrastive_generate(self):
        for model_class in self.all_generative_model_classes:
            if model_class._is_stateful:
-                self.skipTest("Stateful models don't support contrastive search generation")
+                self.skipTest(reason="Stateful models don't support contrastive search generation")
            # won't fix: FSMT and Reformer have a different cache variable type (and format).
            if any(model_name in model_class.__name__.lower() for model_name in ["fsmt", "reformer"]):
-                self.skipTest("Won't fix: old model with different cache format")
+                self.skipTest(reason="Won't fix: old model with different cache format")
            config, input_ids, attention_mask = self._get_input_ids_and_config()
            # NOTE: contrastive search only works with cache on at the moment.
            if not hasattr(config, "use_cache"):
-                self.skipTest("This model doesn't support caching")
+                self.skipTest(reason="This model doesn't support caching")
            config.use_cache = True
            config.is_decoder = True
@ -1009,17 +1009,17 @@ class GenerationTesterMixin:
    def test_contrastive_generate_dict_outputs_use_cache(self):
        for model_class in self.all_generative_model_classes:
            if model_class._is_stateful:
-                self.skipTest("Stateful models don't support contrastive search generation")
+                self.skipTest(reason="Stateful models don't support contrastive search generation")
            # won't fix: FSMT and Reformer have a different cache variable type (and format).
            if any(model_name in model_class.__name__.lower() for model_name in ["fsmt", "reformer"]):
-                self.skipTest("Won't fix: old model with different cache format")
+                self.skipTest(reason="Won't fix: old model with different cache format")
            config, input_ids, attention_mask = self._get_input_ids_and_config()
            # NOTE: contrastive search only works with cache on at the moment.
            if not hasattr(config, "use_cache"):
-                self.skipTest("This model doesn't support caching")
+                self.skipTest(reason="This model doesn't support caching")
            config.use_cache = True
            config.is_decoder = True
@ -1045,18 +1045,18 @@ class GenerationTesterMixin:
        # Check that choosing 'low_memory' does not change the model output
        for model_class in self.all_generative_model_classes:
            if model_class._is_stateful:
-                self.skipTest("Stateful models don't support contrastive search generation")
+                self.skipTest(reason="Stateful models don't support contrastive search generation")
            if any(model_name in model_class.__name__.lower() for model_name in ["fsmt", "reformer", "speech2text"]):
-                self.skipTest("Won't fix: old model with different cache format")
+                self.skipTest(reason="Won't fix: old model with different cache format")
            if any(model_name in model_class.__name__.lower() for model_name in ["gptbigcode"]):
-                self.skipTest("TODO: fix me")
+                self.skipTest(reason="TODO: fix me")
            config, input_ids, attention_mask = self._get_input_ids_and_config(batch_size=1)
            # NOTE: contrastive search only works with cache on at the moment.
            if not hasattr(config, "use_cache"):
-                self.skipTest("This model doesn't support caching")
+                self.skipTest(reason="This model doesn't support caching")
            config.use_cache = True
            config.is_decoder = True
@ -1087,9 +1087,9 @@ class GenerationTesterMixin:
        # Check that choosing 'low_memory' does not change the model output
        for model_class in self.all_generative_model_classes:
            if model_class._is_stateful:
-                self.skipTest("May fix in the future: need custom cache handling")
+                self.skipTest(reason="May fix in the future: need custom cache handling")
            if any(model_name in model_class.__name__.lower() for model_name in ["fsmt", "reformer"]):
-                self.skipTest("Won't fix: old model with different cache format")
+                self.skipTest(reason="Won't fix: old model with different cache format")
            if any(
                model_name in model_class.__name__.lower()
                for model_name in [
@ -1102,7 +1102,7 @@ class GenerationTesterMixin:
                    "jamba",
                ]
            ):
-                self.skipTest("May fix in the future: need model-specific fixes")
+                self.skipTest(reason="May fix in the future: need model-specific fixes")
            config, input_ids, _ = self._get_input_ids_and_config(batch_size=2)
            # batch_size=1 is ok, but batch_size>1 will cause non-identical output
@ -1135,9 +1135,9 @@ class GenerationTesterMixin:
        for model_class in self.all_generative_model_classes:
            if model_class._is_stateful:
-                self.skipTest("Stateful models don't support assisted generation")
+                self.skipTest(reason="Stateful models don't support assisted generation")
            if any(model_name in model_class.__name__.lower() for model_name in ["fsmt", "reformer"]):
-                self.skipTest("Won't fix: old model with different cache format")
+                self.skipTest(reason="Won't fix: old model with different cache format")
            if any(
                model_name in model_class.__name__.lower()
                for model_name in [
@ -1151,14 +1151,14 @@ class GenerationTesterMixin:
                    "clvp",
                ]
            ):
-                self.skipTest("May fix in the future: need model-specific fixes")
+                self.skipTest(reason="May fix in the future: need model-specific fixes")
            # enable cache
            config, input_ids, attention_mask = self._get_input_ids_and_config(batch_size=1)
            # NOTE: assisted generation only works with cache on at the moment.
            if not hasattr(config, "use_cache"):
-                self.skipTest("This model doesn't support caching")
+                self.skipTest(reason="This model doesn't support caching")
            config.use_cache = True
            config.is_decoder = True
@ -1206,9 +1206,9 @@ class GenerationTesterMixin:
        for model_class in self.all_generative_model_classes:
            if model_class._is_stateful:
-                self.skipTest("Stateful models don't support assisted generation")
+                self.skipTest(reason="Stateful models don't support assisted generation")
            if any(model_name in model_class.__name__.lower() for model_name in ["fsmt", "reformer"]):
-                self.skipTest("Won't fix: old model with different cache format")
+                self.skipTest(reason="Won't fix: old model with different cache format")
            if any(
                model_name in model_class.__name__.lower()
                for model_name in [
@ -1222,14 +1222,14 @@ class GenerationTesterMixin:
                    "clvp",
                ]
            ):
-                self.skipTest("May fix in the future: need model-specific fixes")
+                self.skipTest(reason="May fix in the future: need model-specific fixes")
            # enable cache
            config, input_ids, attention_mask = self._get_input_ids_and_config(batch_size=1)
            # NOTE: assisted generation only works with cache on at the moment.
            if not hasattr(config, "use_cache"):
-                self.skipTest("This model doesn't support caching")
+                self.skipTest(reason="This model doesn't support caching")
            config.use_cache = True
            config.is_decoder = True
@ -1268,9 +1268,9 @@ class GenerationTesterMixin:
        # different shapes, see https://github.com/huggingface/transformers/issues/25420#issuecomment-1775317535).
        for model_class in self.all_generative_model_classes:
            if model_class._is_stateful:
-                self.skipTest("Stateful models don't support assisted generation")
+                self.skipTest(reason="Stateful models don't support assisted generation")
            if any(model_name in model_class.__name__.lower() for model_name in ["fsmt", "reformer"]):
-                self.skipTest("Won't fix: old model with different cache format")
+                self.skipTest(reason="Won't fix: old model with different cache format")
            if any(
                model_name in model_class.__name__.lower()
                for model_name in [
@ -1284,14 +1284,14 @@ class GenerationTesterMixin:
                    "clvp",
                ]
            ):
-                self.skipTest("May fix in the future: need model-specific fixes")
+                self.skipTest(reason="May fix in the future: need model-specific fixes")
            # enable cache
            config, input_ids, attention_mask = self._get_input_ids_and_config(batch_size=1)
            # NOTE: assisted generation only works with cache on at the moment.
            if not hasattr(config, "use_cache"):
-                self.skipTest("This model doesn't support caching")
+                self.skipTest(reason="This model doesn't support caching")
            config.use_cache = True
            config.is_decoder = True
@ -1436,7 +1436,7 @@ class GenerationTesterMixin:
            # If it doesn't support cache, pass the test
            if not hasattr(config, "use_cache"):
-                self.skipTest("This model doesn't support caching")
+                self.skipTest(reason="This model doesn't support caching")
            model = model_class(config).to(torch_device)
            if "use_cache" not in inputs:
@ -1445,7 +1445,7 @@ class GenerationTesterMixin:
            # If "past_key_values" is not returned, pass the test (e.g. RWKV uses a different cache name and format)
            if "past_key_values" not in outputs:
-                self.skipTest("This model doesn't return `past_key_values`")
+                self.skipTest(reason="This model doesn't return `past_key_values`")
            num_hidden_layers = (
                getattr(config, "decoder_layers", None)
@ -1553,14 +1553,14 @@ class GenerationTesterMixin:
        # Tests that we can continue generating from past key values, returned from a previous `generate` call
        for model_class in self.all_generative_model_classes:
            if any(model_name in model_class.__name__.lower() for model_name in ["imagegpt"]):
-                self.skipTest("Won't fix: old model with unique inputs/caches/other")
+                self.skipTest(reason="Won't fix: old model with unique inputs/caches/other")
            if any(model_name in model_class.__name__.lower() for model_name in ["umt5"]):
-                self.skipTest("TODO: needs modeling or test input preparation fixes for compatibility")
+                self.skipTest(reason="TODO: needs modeling or test input preparation fixes for compatibility")
            config, inputs = self.model_tester.prepare_config_and_inputs_for_common()
            if not hasattr(config, "use_cache"):
-                self.skipTest("This model doesn't support caching")
+                self.skipTest(reason="This model doesn't support caching")
            # Let's make it always:
            # 1. use cache (for obvious reasons)
@ -1582,7 +1582,7 @@ class GenerationTesterMixin:
            # If "past_key_values" is not returned, skip the test (e.g. RWKV uses a different cache name and format)
            outputs = model(**inputs)
            if "past_key_values" not in outputs:
-                self.skipTest("This model doesn't return `past_key_values`")
+                self.skipTest(reason="This model doesn't return `past_key_values`")
            # Traditional way of generating text, with `return_dict_in_generate` to return the past key values
            outputs = model.generate(**inputs, do_sample=False, max_new_tokens=4, return_dict_in_generate=True)
@ -1632,7 +1632,7 @@ class GenerationTesterMixin:
        # 👉 tests with and without sampling so we can cover the most common use cases.
        for model_class in self.all_generative_model_classes:
            if not model_class._supports_cache_class:
-                self.skipTest("This model does not support the new cache format")
+                self.skipTest(reason="This model does not support the new cache format")
            config, input_ids, attention_mask = self._get_input_ids_and_config()
            config.use_cache = True
@ -1689,7 +1689,7 @@ class GenerationTesterMixin:
    def test_generate_with_quant_cache(self):
        for model_class in self.all_generative_model_classes:
            if not model_class._supports_quantized_cache:
-                self.skipTest("This model does not support the quantized cache format")
+                self.skipTest(reason="This model does not support the quantized cache format")
            config, input_ids, attention_mask = self._get_input_ids_and_config()
            config.use_cache = True
--- a/tests/models/albert/test_tokenization_albert.py
+++ b/tests/models/albert/test_tokenization_albert.py
@ -67,7 +67,7 @@ class AlbertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
--- a/tests/models/align/test_modeling_align.py
+++ b/tests/models/align/test_modeling_align.py
@ -23,7 +23,6 @@ import requests
 from transformers import AlignConfig, AlignProcessor, AlignTextConfig, AlignVisionConfig
 from transformers.testing_utils import (
    is_flax_available,
    require_torch,
    require_vision,
    slow,
@ -56,10 +55,6 @@ if is_vision_available():
    from PIL import Image
 if is_flax_available():
    pass
 class AlignVisionModelTester:
    def __init__(
        self,
@ -215,9 +210,11 @@ class AlignVisionModelTest(ModelTesterMixin, unittest.TestCase):
            check_hidden_states_output(inputs_dict, config, model_class)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -355,9 +352,11 @@ class AlignTextModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -518,7 +517,7 @@ class AlignModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
--- a/tests/models/altclip/test_modeling_altclip.py
+++ b/tests/models/altclip/test_modeling_altclip.py
@ -178,9 +178,11 @@ class AltCLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -309,7 +311,7 @@ class AltCLIPTextModelTest(ModelTesterMixin, unittest.TestCase):
    test_head_masking = False
    # TODO (@SunMarc): Fix me
-    @unittest.skip("It's broken.")
+    @unittest.skip(reason="It's broken.")
    def test_resize_tokens_embeddings(self):
        super().test_resize_tokens_embeddings()
@ -324,9 +326,11 @@ class AltCLIPTextModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -487,7 +491,7 @@ class AltCLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
--- a/tests/models/bark/test_modeling_bark.py
+++ b/tests/models/bark/test_modeling_bark.py
@ -754,7 +754,7 @@ class BarkFineModelTest(ModelTesterMixin, unittest.TestCase):
            with torch.no_grad():
                model(**inputs)[0]
-    @unittest.skip("FineModel relies on codebook idx and does not return same logits")
+    @unittest.skip(reason="FineModel relies on codebook idx and does not return same logits")
    def test_inputs_embeds_matches_input_ids(self):
        pass
@ -826,7 +826,7 @@ class BarkFineModelTest(ModelTesterMixin, unittest.TestCase):
        # resizing tokens_embeddings of a ModuleList
        original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        if not self.test_resize_embeddings:
-            return
+            self.skipTest(reason="test_resize_embeddings is False")
        for model_class in self.all_model_classes:
            config = copy.deepcopy(original_config)
@ -877,7 +877,7 @@ class BarkFineModelTest(ModelTesterMixin, unittest.TestCase):
        # resizing tokens_embeddings of a ModuleList
        original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        if not self.test_resize_embeddings:
-            return
+            self.skipTest(reason="test_resize_embeddings is False")
        original_config.tie_word_embeddings = False
@ -931,7 +931,7 @@ class BarkFineModelTest(ModelTesterMixin, unittest.TestCase):
    def test_flash_attn_2_inference_equivalence(self):
        for model_class in self.all_model_classes:
            if not model_class._supports_flash_attn_2:
-                return
+                self.skipTest(reason="Model does not support flash_attention_2")
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
            model = model_class(config)
@ -988,7 +988,7 @@ class BarkFineModelTest(ModelTesterMixin, unittest.TestCase):
    def test_flash_attn_2_inference_equivalence_right_padding(self):
        for model_class in self.all_model_classes:
            if not model_class._supports_flash_attn_2:
-                return
+                self.skipTest(reason="Model does not support flash_attention_2")
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
            model = model_class(config)
--- a/tests/models/bart/test_modeling_bart.py
+++ b/tests/models/bart/test_modeling_bart.py
@ -1515,9 +1515,10 @@ class BartStandaloneDecoderModelTest(ModelTesterMixin, GenerationTesterMixin, un
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_decoder_model_attention_mask_past(*config_and_inputs)
    @unittest.skip(reason="Decoder cannot keep gradients")
    def test_retain_grad_hidden_states_attentions(self):
        # decoder cannot keep gradients
        return
    @unittest.skip
    def test_save_load_fast_init_from_base(self):
        pass
--- a/tests/models/bart/test_tokenization_bart.py
+++ b/tests/models/bart/test_tokenization_bart.py
@ -147,6 +147,7 @@ class TestTokenizationBart(TokenizerTesterMixin, unittest.TestCase):
            self.assertTrue((input_ids[:, -1] == tokenizer.eos_token_id).all().item())
            self.assertTrue((labels[:, -1] == tokenizer.eos_token_id).all().item())
    @unittest.skip
    def test_pretokenized_inputs(self):
        pass
--- a/tests/models/barthez/test_tokenization_barthez.py
+++ b/tests/models/barthez/test_tokenization_barthez.py
@ -75,7 +75,7 @@ class BarthezTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
--- a/tests/models/beit/test_modeling_beit.py
+++ b/tests/models/beit/test_modeling_beit.py
@ -301,7 +301,7 @@ class BeitModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def test_training(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.return_dict = True
@ -325,7 +325,7 @@ class BeitModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def test_training_gradient_checkpointing(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        config.use_cache = False
        config.return_dict = True
--- a/tests/models/bert/test_modeling_bert.py
+++ b/tests/models/bert/test_modeling_bert.py
@ -614,7 +614,7 @@ class BertModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
        for model_class in self.all_model_classes:
            # BertForMultipleChoice behaves incorrectly in JIT environments.
            if model_class == BertForMultipleChoice:
-                return
+                self.skipTest(reason="BertForMultipleChoice behaves incorrectly in JIT environments.")
            config.torchscript = True
            model = model_class(config=config)
--- a/tests/models/bert/test_tokenization_bert.py
+++ b/tests/models/bert/test_tokenization_bert.py
@ -79,7 +79,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
--- a/tests/models/big_bird/test_modeling_big_bird.py
+++ b/tests/models/big_bird/test_modeling_big_bird.py
@ -716,7 +716,7 @@ class BigBirdModelIntegrationTest(unittest.TestCase):
        """
        if not self.test_attention_probs:
-            return
+            self.skip("test_attention_probs is set to False")
        model = BigBirdModel.from_pretrained(
            "google/bigbird-roberta-base", attention_type="block_sparse", num_random_blocks=3, block_size=16
--- a/tests/models/big_bird/test_tokenization_big_bird.py
+++ b/tests/models/big_bird/test_tokenization_big_bird.py
@ -63,7 +63,7 @@ class BigBirdTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
--- a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py
+++ b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py
@ -335,14 +335,15 @@ class BigBirdPegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT
    def test_generate_without_input_ids(self):
        if self.model_tester.attention_type == "block_sparse":
-            # this test can never pass for BigBird-block-sparse attention since input_ids must be multiple of block_size
+            self.skipTest(
-            return
+                "Cannot pass for BigBird-block-sparse attention since input_ids must be multiple of block_size"
            )
        super().test_generate_without_input_ids()
    def test_retain_grad_hidden_states_attentions(self):
        if self.model_tester.attention_type == "block_sparse":
            # this test can't pass since attention matrix (which is getting returned) can't have gradients (& just 0 at many locations)
-            return
+            self.skipTest(reason="Cannot pass since returned attention matrix can't have gradients")
        super().test_retain_grad_hidden_states_attentions()
    # BigBirdPegasusForSequenceClassification does not support inputs_embeds
@ -811,6 +812,6 @@ class BigBirdPegasusStandaloneDecoderModelTest(ModelTesterMixin, GenerationTeste
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_decoder_model_attention_mask_past(*config_and_inputs)
    @unittest.skip("Decoder cannot retain gradients")
    def test_retain_grad_hidden_states_attentions(self):
        # decoder cannot keep gradients
        return
--- a/tests/models/biogpt/test_modeling_biogpt.py
+++ b/tests/models/biogpt/test_modeling_biogpt.py
@ -414,7 +414,7 @@ class BioGptModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
        result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels)
        self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels))
-    @unittest.skip("The `input_embeds` when fed don't produce the same results.")
+    @unittest.skip(reason="The `input_embeds` when fed don't produce the same results.")
    def test_beam_sample_generate(self):
        pass
--- a/tests/models/blenderbot/test_modeling_blenderbot.py
+++ b/tests/models/blenderbot/test_modeling_blenderbot.py
@ -565,6 +565,6 @@ class BlenderbotStandaloneDecoderModelTest(ModelTesterMixin, GenerationTesterMix
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_decoder_model_attention_mask_past(*config_and_inputs)
    @unittest.skip(reason="decoder cannot keep gradients")
    def test_retain_grad_hidden_states_attentions(self):
        # decoder cannot keep gradients
        return
--- a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py
+++ b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py
@ -564,6 +564,6 @@ class BlenderbotSmallStandaloneDecoderModelTest(ModelTesterMixin, GenerationTest
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_decoder_model_attention_mask_past(*config_and_inputs)
    @unittest.skip(reason="decoder cannot keep gradients")
    def test_retain_grad_hidden_states_attentions(self):
        # decoder cannot keep gradients
        return
--- a/tests/models/blip/test_image_processing_blip.py
+++ b/tests/models/blip/test_image_processing_blip.py
@ -130,18 +130,18 @@ class BlipImageProcessingTestFourChannels(ImageProcessingTestMixin, unittest.Tes
        self.assertTrue(hasattr(image_processor, "image_std"))
        self.assertTrue(hasattr(image_processor, "do_convert_rgb"))
-    @unittest.skip("BlipImageProcessor does not support 4 channels yet")  # FIXME Amy
+    @unittest.skip(reason="BlipImageProcessor does not support 4 channels yet")  # FIXME Amy
    def test_call_numpy(self):
        return super().test_call_numpy()
-    @unittest.skip("BlipImageProcessor does not support 4 channels yet")  # FIXME Amy
+    @unittest.skip(reason="BlipImageProcessor does not support 4 channels yet")  # FIXME Amy
    def test_call_pytorch(self):
        return super().test_call_torch()
-    @unittest.skip("BLIP doesn't treat 4 channel PIL and numpy consistently yet")  # FIXME Amy
+    @unittest.skip(reason="BLIP doesn't treat 4 channel PIL and numpy consistently yet")  # FIXME Amy
    def test_call_pil(self):
        pass
-    @unittest.skip("BLIP doesn't treat 4 channel PIL and numpy consistently yet")  # FIXME Amy
+    @unittest.skip(reason="BLIP doesn't treat 4 channel PIL and numpy consistently yet")  # FIXME Amy
    def test_call_numpy_4_channels(self):
        pass
--- a/tests/models/blip/test_modeling_blip.py
+++ b/tests/models/blip/test_modeling_blip.py
@ -193,9 +193,11 @@ class BlipVisionModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -335,9 +337,11 @@ class BlipTextModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -491,7 +495,7 @@ class BlipModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
@ -932,7 +936,7 @@ class BlipTextRetrievalModelTest(ModelTesterMixin, unittest.TestCase):
    def test_training(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="ModelTester is not setup for training")
        for model_class in self.all_model_classes[:-1]:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@ -951,7 +955,7 @@ class BlipTextRetrievalModelTest(ModelTesterMixin, unittest.TestCase):
    def test_training_gradient_checkpointing(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="ModelTester is not setup for training")
        for model_class in self.all_model_classes[:-1]:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@ -1008,7 +1012,7 @@ class BlipTextRetrievalModelTest(ModelTesterMixin, unittest.TestCase):
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
@ -1160,7 +1164,7 @@ class BlipTextImageModelTest(ModelTesterMixin, unittest.TestCase):
    def test_training(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="ModelTester is not setup for training")
        for model_class in self.all_model_classes[:-1]:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@ -1179,7 +1183,7 @@ class BlipTextImageModelTest(ModelTesterMixin, unittest.TestCase):
    def test_training_gradient_checkpointing(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="ModelTester is not setup for training")
        for model_class in self.all_model_classes[:-1]:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@ -1224,7 +1228,7 @@ class BlipTextImageModelTest(ModelTesterMixin, unittest.TestCase):
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
--- a/tests/models/blip/test_modeling_blip_text.py
+++ b/tests/models/blip/test_modeling_blip_text.py
@ -141,9 +141,11 @@ class BlipTextModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
--- a/tests/models/blip_2/test_modeling_blip_2.py
+++ b/tests/models/blip_2/test_modeling_blip_2.py
@ -187,9 +187,11 @@ class Blip2VisionModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
--- a/tests/models/bloom/test_modeling_bloom.py
+++ b/tests/models/bloom/test_modeling_bloom.py
@ -389,7 +389,7 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bloom_weight_initialization(*config_and_inputs)
-    @unittest.skip("Bloom has a non-standard KV cache format.")
+    @unittest.skip(reason="Bloom has a non-standard KV cache format.")
    def test_past_key_values_format(self):
        pass
--- a/tests/models/bloom/test_tokenization_bloom.py
+++ b/tests/models/bloom/test_tokenization_bloom.py
@ -43,7 +43,7 @@ class BloomTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        kwargs.update(self.special_tokens_map)
        return BloomTokenizerFast.from_pretrained(self.tmpdirname, **kwargs)
-    @unittest.skip("This needs a slow tokenizer. Bloom does not have one!")
+    @unittest.skip(reason="This needs a slow tokenizer. Bloom does not have one!")
    def test_encode_decode_with_spaces(self):
        return
--- a/tests/models/byt5/test_tokenization_byt5.py
+++ b/tests/models/byt5/test_tokenization_byt5.py
@ -300,15 +300,15 @@ class ByT5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                self.assertTrue(tokenizer.decode([255]) == "")
-    # tokenizer does not have vocabulary
+    @unittest.skip(reason="ByT5Tokenizer does not have a vocabulary")
    def test_get_vocab(self):
        pass
-    # inputs cannot be pretokenized since ids depend on whole input string and not just on single characters
+    @unittest.skip(reason="inputs cannot be pretokenized as ids depend on whole input string")
    def test_pretokenized_inputs(self):
        pass
-    # tests all ids in vocab => vocab doesn't exist so unnecessary to test
+    @unittest.skip(reason="ByT5Tokenizer does not have a vocabulary")
    def test_conversion_reversible(self):
        pass
--- a/tests/models/camembert/test_tokenization_camembert.py
+++ b/tests/models/camembert/test_tokenization_camembert.py
@ -94,7 +94,7 @@ class CamembertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
--- a/tests/models/canine/test_modeling_canine.py
+++ b/tests/models/canine/test_modeling_canine.py
@ -441,7 +441,7 @@ class CanineModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def test_headmasking(self):
        if not self.test_head_masking:
-            return
+            self.skipTest(reason="test_head_masking is set to False")
        global_rng.seed(42)
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@ -496,7 +496,7 @@ class CanineModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
            check_attentions_validity(outputs.attentions)
-    @unittest.skip("CANINE does not have a get_input_embeddings() method.")
+    @unittest.skip(reason="CANINE does not have a get_input_embeddings() method.")
    def test_inputs_embeds(self):
        # ViT does not use inputs_embeds
        pass
@ -505,7 +505,7 @@ class CanineModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def test_inputs_embeds_matches_input_ids(self):
        pass
-    @unittest.skip("CANINE does not have a get_input_embeddings() method.")
+    @unittest.skip(reason="CANINE does not have a get_input_embeddings() method.")
    def test_model_get_set_embeddings(self):
        pass
--- a/tests/models/canine/test_tokenization_canine.py
+++ b/tests/models/canine/test_tokenization_canine.py
@ -303,31 +303,32 @@ class CanineTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertListEqual(getattr(tokenizer, "additional_special_tokens"), [additional_special_token])
        self.assertListEqual(getattr(tokenizer, "additional_special_tokens_ids"), [additional_special_token_id])
-    # tokenizer has a fixed vocab_size (namely all possible unicode code points)
+    @unittest.skip(reason="tokenizer has a fixed vocab_size (namely all possible unicode code points)")
    def test_add_tokens_tokenizer(self):
        pass
    # CanineTokenizer does not support do_lower_case = True, as each character has its own Unicode code point
    # ("b" and "B" for example have different Unicode code points)
    @unittest.skip(reason="CanineTokenizer does not support do_lower_case = True")
    def test_added_tokens_do_lower_case(self):
        pass
-    # CanineModel does not support the get_input_embeddings nor the get_vocab method
+    @unittest.skip(reason="CanineModel does not support the get_input_embeddings nor the get_vocab method")
    def test_np_encode_plus_sent_to_model(self):
        pass
-    # CanineModel does not support the get_input_embeddings nor the get_vocab method
+    @unittest.skip(reason="CanineModel does not support the get_input_embeddings nor the get_vocab method")
    def test_torch_encode_plus_sent_to_model(self):
        pass
-    # tokenizer does not have vocabulary
+    @unittest.skip(reason="CanineTokenizer does not have vocabulary")
    def test_get_vocab(self):
        pass
-    # inputs cannot be pretokenized since ids depend on whole input string and not just on single characters
+    @unittest.skip(reason="inputs cannot be pretokenized since ids depend on whole input string")
    def test_pretokenized_inputs(self):
        pass
-    # tests all ids in vocab => vocab doesn't exist so unnecessary to test
+    @unittest.skip(reason="CanineTokenizer does not have vocabulary")
    def test_conversion_reversible(self):
        pass
--- a/tests/models/chinese_clip/test_image_processing_chinese_clip.py
+++ b/tests/models/chinese_clip/test_image_processing_chinese_clip.py
@ -17,7 +17,7 @@
 import unittest
 from transformers.testing_utils import require_torch, require_vision
-from transformers.utils import is_torch_available, is_vision_available
+from transformers.utils import is_vision_available
 from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
@ -26,10 +26,6 @@ if is_vision_available():
    from transformers import ChineseCLIPImageProcessor
 if is_torch_available():
    pass
 class ChineseCLIPImageProcessingTester(unittest.TestCase):
    def __init__(
        self,
@ -125,7 +121,9 @@ class ChineseCLIPImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase
        self.assertEqual(image_processor.size, {"shortest_edge": 42})
        self.assertEqual(image_processor.crop_size, {"height": 84, "width": 84})
-    @unittest.skip("ChineseCLIPImageProcessor doesn't treat 4 channel PIL and numpy consistently yet")  # FIXME Amy
+    @unittest.skip(
        reason="ChineseCLIPImageProcessor doesn't treat 4 channel PIL and numpy consistently yet"
    )  # FIXME Amy
    def test_call_numpy_4_channels(self):
        pass
@ -155,14 +153,16 @@ class ChineseCLIPImageProcessingTestFourChannels(ImageProcessingTestMixin, unitt
        self.assertTrue(hasattr(image_processing, "image_std"))
        self.assertTrue(hasattr(image_processing, "do_convert_rgb"))
-    @unittest.skip("ChineseCLIPImageProcessor does not support 4 channels yet")  # FIXME Amy
+    @unittest.skip(reason="ChineseCLIPImageProcessor does not support 4 channels yet")  # FIXME Amy
    def test_call_numpy(self):
        return super().test_call_numpy()
-    @unittest.skip("ChineseCLIPImageProcessor does not support 4 channels yet")  # FIXME Amy
+    @unittest.skip(reason="ChineseCLIPImageProcessor does not support 4 channels yet")  # FIXME Amy
    def test_call_pytorch(self):
        return super().test_call_torch()
-    @unittest.skip("ChineseCLIPImageProcessor doesn't treat 4 channel PIL and numpy consistently yet")  # FIXME Amy
+    @unittest.skip(
        reason="ChineseCLIPImageProcessor doesn't treat 4 channel PIL and numpy consistently yet"
    )  # FIXME Amy
    def test_call_numpy_4_channels(self):
        pass
--- a/tests/models/chinese_clip/test_modeling_chinese_clip.py
+++ b/tests/models/chinese_clip/test_modeling_chinese_clip.py
@ -388,9 +388,11 @@ class ChineseCLIPTextModelTest(ModelTesterMixin, unittest.TestCase):
        model = ChineseCLIPTextModel.from_pretrained(model_name)
        self.assertIsNotNone(model)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -466,9 +468,11 @@ class ChineseCLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -621,7 +625,7 @@ class ChineseCLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
--- a/tests/models/clap/test_modeling_clap.py
+++ b/tests/models/clap/test_modeling_clap.py
@ -562,7 +562,7 @@ class ClapModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
--- a/tests/models/clip/test_modeling_clip.py
+++ b/tests/models/clip/test_modeling_clip.py
@ -220,9 +220,11 @@ class CLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model_with_projection(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -381,9 +383,11 @@ class CLIPTextModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model_with_projection(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -535,7 +539,7 @@ class CLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
@ -636,7 +640,7 @@ class CLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
                fx_model_class_name = "Flax" + model_class.__name__
                if not hasattr(transformers, fx_model_class_name):
-                    return
+                    self.skipTest(reason="No Flax model exists for this class")
                fx_model_class = getattr(transformers, fx_model_class_name)
@ -692,8 +696,7 @@ class CLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
                fx_model_class_name = "Flax" + model_class.__name__
                if not hasattr(transformers, fx_model_class_name):
-                    # no flax model exists for this class
+                    self.skipTest(reason="No Flax model exists for this class")
                    return
                fx_model_class = getattr(transformers, fx_model_class_name)
--- a/tests/models/clip/test_tokenization_clip.py
+++ b/tests/models/clip/test_tokenization_clip.py
@ -178,7 +178,6 @@ class CLIPTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_tokenization_python_rust_equals(self):
        super().test_tokenization_python_rust_equals()
-    # overwrite common test
+    @unittest.skip(reason="CLIP always lower cases letters")
    def test_added_tokens_do_lower_case(self):
        # CLIP always lower cases letters
        pass
--- a/tests/models/clipseg/test_modeling_clipseg.py
+++ b/tests/models/clipseg/test_modeling_clipseg.py
@ -194,9 +194,11 @@ class CLIPSegVisionModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -331,9 +333,11 @@ class CLIPSegTextModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -540,7 +544,7 @@ class CLIPSegModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
@ -641,7 +645,7 @@ class CLIPSegModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
                fx_model_class_name = "Flax" + model_class.__name__
                if not hasattr(transformers, fx_model_class_name):
-                    return
+                    self.skipTest(reason="No Flax model exists for this class")
                fx_model_class = getattr(transformers, fx_model_class_name)
@ -697,8 +701,7 @@ class CLIPSegModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
                fx_model_class_name = "Flax" + model_class.__name__
                if not hasattr(transformers, fx_model_class_name):
-                    # no flax model exists for this class
+                    self.skipTest(reason="No Flax model exists for this class")
                    return
                fx_model_class = getattr(transformers, fx_model_class_name)
@ -744,7 +747,7 @@ class CLIPSegModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    def test_training(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="Training test is skipped as the model was not trained")
        for model_class in self.all_model_classes:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
--- a/tests/models/clvp/test_tokenization_clvp.py
+++ b/tests/models/clvp/test_tokenization_clvp.py
@ -102,7 +102,7 @@ class ClvpTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    # Copied from transformers.tests.models.gpt2.test_tokenization_gpt2.GPT2TokenizationTest.test_rust_and_python_full_tokenizers
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer(add_prefix_space=True)
--- a/tests/models/code_llama/test_tokenization_code_llama.py
+++ b/tests/models/code_llama/test_tokenization_code_llama.py
@ -26,7 +26,6 @@ from transformers import (
    AddedToken,
    CodeLlamaTokenizer,
    CodeLlamaTokenizerFast,
    is_torch_available,
 )
 from transformers.convert_slow_tokenizer import convert_slow_tokenizer
 from transformers.testing_utils import (
@ -44,10 +43,6 @@ from ...test_tokenization_common import TokenizerTesterMixin
 SAMPLE_VOCAB = get_tests_dir("fixtures/test_sentencepiece.model")
 if is_torch_available():
    pass
@require_sentencepiece
@require_tokenizers
 class CodeLlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@ -220,7 +215,7 @@ class CodeLlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    @require_torch
    def test_batch_tokenization(self):
        if not self.test_seq2seq:
-            return
+            self.skipTest(reason="test_seq2seq is False")
        tokenizers = self.get_tokenizers()
        for tokenizer in tokenizers:
@ -240,7 +235,7 @@ class CodeLlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        return_tensors="pt",
                    )
                except NotImplementedError:
-                    return
+                    self.skipTest(reason="Encountered NotImplementedError when calling tokenizer")
                self.assertEqual(batch.input_ids.shape[1], 3)
                # max_target_length will default to max_length if not specified
                batch = tokenizer(text, max_length=3, return_tensors="pt")
@ -251,7 +246,7 @@ class CodeLlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                self.assertEqual(batch_encoder_only.attention_mask.shape[1], 3)
                self.assertNotIn("decoder_input_ids", batch_encoder_only)
-    @unittest.skip("Unfortunately way too slow to build a BPE with SentencePiece.")
+    @unittest.skip(reason="Unfortunately way too slow to build a BPE with SentencePiece.")
    def test_save_slow_from_fast_and_reload_fast(self):
        pass
@ -306,11 +301,11 @@ class CodeLlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
            pickled_tokenizer = pickle.dumps(tokenizer)
        pickle.loads(pickled_tokenizer)
-    @unittest.skip("worker 'gw4' crashed on CI, passing locally.")
+    @unittest.skip(reason="worker 'gw4' crashed on CI, passing locally.")
    def test_pickle_subword_regularization_tokenizer(self):
        pass
-    @unittest.skip("worker 'gw4' crashed on CI, passing locally.")
+    @unittest.skip(reason="worker 'gw4' crashed on CI, passing locally.")
    def test_subword_regularization_tokenizer(self):
        pass
--- a/tests/models/codegen/test_tokenization_codegen.py
+++ b/tests/models/codegen/test_tokenization_codegen.py
@ -99,7 +99,7 @@ class CodeGenTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer(add_prefix_space=True)
@ -127,6 +127,7 @@ class CodeGenTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        input_bpe_tokens = [14, 15, 10, 9, 3, 2, 15, 19]
        self.assertListEqual(rust_tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens)
    @unittest.skip
    def test_pretokenized_inputs(self, *args, **kwargs):
        # It's very difficult to mix/test pretokenization with byte-level
        # And get both CodeGen and Roberta to work at the same time (mostly an issue of adding a space before the string)
@ -262,6 +263,7 @@ class CodeGenTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        # TODO @ArthurZ outputs of the fast tokenizer are different in this case, un-related to the PR
    # tokenizer has no padding token
    @unittest.skip(reason="tokenizer has no padding token")
    def test_padding_different_model_input_name(self):
        pass
--- a/tests/models/cohere/test_tokenization_cohere.py
+++ b/tests/models/cohere/test_tokenization_cohere.py
@ -51,7 +51,7 @@ class CohereTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_torch_encode_plus_sent_to_model(self):
        super().test_torch_encode_plus_sent_to_model()
-    @unittest.skip("This needs a slow tokenizer. Cohere does not have one!")
+    @unittest.skip(reason="This needs a slow tokenizer. Cohere does not have one!")
    def test_encode_decode_with_spaces(self):
        return
--- a/tests/models/conditional_detr/test_modeling_conditional_detr.py
+++ b/tests/models/conditional_detr/test_modeling_conditional_detr.py
@ -263,8 +263,8 @@ class ConditionalDetrModelTest(ModelTesterMixin, GenerationTesterMixin, Pipeline
        pass
    @slow
    @unittest.skip(reason="TODO Niels: fix me!")
    def test_model_outputs_equivalence(self):
        # TODO Niels: fix me!
        pass
    def test_attention_outputs(self):
--- a/tests/models/convbert/test_modeling_convbert.py
+++ b/tests/models/convbert/test_modeling_convbert.py
@ -433,7 +433,7 @@ class ConvBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
        for model_class in self.all_model_classes:
            # ConvBertForMultipleChoice behaves incorrectly in JIT environments.
            if model_class == ConvBertForMultipleChoice:
-                return
+                self.skipTest(reason="ConvBertForMultipleChoice behaves incorrectly in JIT environments.")
            config.torchscript = True
            model = model_class(config=config)
--- a/tests/models/convnextv2/test_modeling_convnextv2.py
+++ b/tests/models/convnextv2/test_modeling_convnextv2.py
@ -216,7 +216,7 @@ class ConvNextV2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
    def test_training(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="ModelTester is not set to test training")
        for model_class in self.all_model_classes:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_with_labels()
@ -237,7 +237,7 @@ class ConvNextV2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
    def test_training_gradient_checkpointing(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="ModelTester is not set to test training")
        for model_class in self.all_model_classes:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_with_labels()
--- a/tests/models/cpmant/test_modeling_cpmant.py
+++ b/tests/models/cpmant/test_modeling_cpmant.py
@ -154,7 +154,7 @@ class CpmAntModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        self.config_tester.run_common_tests()
    def test_inputs_embeds(self):
-        unittest.skip("CPMAnt doesn't support input_embeds.")(self.test_inputs_embeds)
+        unittest.skip(reason="CPMAnt doesn't support input_embeds.")(self.test_inputs_embeds)
    def test_retain_grad_hidden_states_attentions(self):
        unittest.skip(
--- a/tests/models/data2vec/test_modeling_data2vec_audio.py
+++ b/tests/models/data2vec/test_modeling_data2vec_audio.py
@ -426,22 +426,19 @@ class Data2VecAudioModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Tes
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.check_labels_out_of_vocab(*config_and_inputs)
-    # Data2VecAudio has no inputs_embeds
+    @unittest.skip(reason="Data2VecAudio has no inputs_embeds")
    def test_inputs_embeds(self):
        pass
-    # `input_ids` is renamed to `input_values`
+    @unittest.skip(reason="`input_ids` is renamed to `input_values`")
    def test_forward_signature(self):
        pass
-    # Data2VecAudio cannot resize token embeddings
+    @unittest.skip(reason="Data2VecAudio has no tokens embeddings")
    # since it has no tokens embeddings
    def test_resize_tokens_embeddings(self):
        pass
-    # Data2VecAudio has no inputs_embeds
+    @unittest.skip(reason="Data2VecAudio has no inputs_embeds")
    # and thus the `get_input_embeddings` fn
    # is not implemented
    def test_model_get_set_embeddings(self):
        pass
--- a/tests/models/data2vec/test_modeling_data2vec_vision.py
+++ b/tests/models/data2vec/test_modeling_data2vec_vision.py
@ -196,8 +196,8 @@ class Data2VecVisionModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Te
    def test_config(self):
        self.config_tester.run_common_tests()
    @unittest.skip(reason="Data2VecVision does not use inputs_embeds")
    def test_inputs_embeds(self):
        # Data2VecVision does not use inputs_embeds
        pass
    @require_torch_multi_gpu
@ -226,7 +226,7 @@ class Data2VecVisionModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Te
    def test_training(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.return_dict = True
@ -245,7 +245,7 @@ class Data2VecVisionModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Te
    def test_training_gradient_checkpointing(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        config.use_cache = False
        config.return_dict = True
--- a/tests/models/dbrx/test_modeling_dbrx.py
+++ b/tests/models/dbrx/test_modeling_dbrx.py
@ -350,21 +350,21 @@ class DbrxModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
        model = DbrxModel.from_pretrained(model_name)
        self.assertIsNotNone(model)
-    @unittest.skip("Dbrx models have weight tying disabled.")
+    @unittest.skip(reason="Dbrx models have weight tying disabled.")
    def test_tied_weights_keys(self):
        pass
    # Offload does not work with Dbrx models because of the forward of DbrxExperts where we chunk the experts.
    # The issue is that the offloaded weights of the mlp layer are still on meta device (w1_chunked, v1_chunked, w2_chunked)
-    @unittest.skip("Dbrx models do not work with offload")
+    @unittest.skip(reason="Dbrx models do not work with offload")
    def test_cpu_offload(self):
        pass
-    @unittest.skip("Dbrx models do not work with offload")
+    @unittest.skip(reason="Dbrx models do not work with offload")
    def test_disk_offload_safetensors(self):
        pass
-    @unittest.skip("Dbrx models do not work with offload")
+    @unittest.skip(reason="Dbrx models do not work with offload")
    def test_disk_offload_bin(self):
        pass
--- a/tests/models/deberta_v2/test_tokenization_deberta_v2.py
+++ b/tests/models/deberta_v2/test_tokenization_deberta_v2.py
@ -79,11 +79,11 @@ class DebertaV2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertListEqual(rust_tokens, tokens_target)
-    @unittest.skip("There is an inconsistency between slow and fast tokenizer due to a bug in the fast one.")
+    @unittest.skip(reason="There is an inconsistency between slow and fast tokenizer due to a bug in the fast one.")
    def test_sentencepiece_tokenize_and_convert_tokens_to_string(self):
        pass
-    @unittest.skip("There is an inconsistency between slow and fast tokenizer due to a bug in the fast one.")
+    @unittest.skip(reason="There is an inconsistency between slow and fast tokenizer due to a bug in the fast one.")
    def test_sentencepiece_tokenize_and_decode(self):
        pass
--- a/tests/models/deformable_detr/test_modeling_deformable_detr.py
+++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py
@ -606,15 +606,15 @@ class DeformableDetrModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT
                        msg=f"Parameter {name} of model {model_class} seems not properly initialized",
                    )
-    @unittest.skip("No support for low_cpu_mem_usage=True.")
+    @unittest.skip(reason="No support for low_cpu_mem_usage=True.")
    def test_save_load_low_cpu_mem_usage(self):
        pass
-    @unittest.skip("No support for low_cpu_mem_usage=True.")
+    @unittest.skip(reason="No support for low_cpu_mem_usage=True.")
    def test_save_load_low_cpu_mem_usage_checkpoints(self):
        pass
-    @unittest.skip("No support for low_cpu_mem_usage=True.")
+    @unittest.skip(reason="No support for low_cpu_mem_usage=True.")
    def test_save_load_low_cpu_mem_usage_no_safetensors(self):
        pass
--- a/tests/models/deit/test_modeling_deit.py
+++ b/tests/models/deit/test_modeling_deit.py
@ -274,7 +274,7 @@ class DeiTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def test_training(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.return_dict = True
@ -296,7 +296,7 @@ class DeiTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def test_training_gradient_checkpointing(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        config.use_cache = False
        config.return_dict = True
--- a/tests/models/detr/test_modeling_detr.py
+++ b/tests/models/detr/test_modeling_detr.py
@ -263,8 +263,8 @@ class DetrModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
        pass
    @slow
    @unittest.skip(reason="TODO Niels: fix me!")
    def test_model_outputs_equivalence(self):
        # TODO Niels: fix me!
        pass
    def test_attention_outputs(self):
--- a/tests/models/dinat/test_modeling_dinat.py
+++ b/tests/models/dinat/test_modeling_dinat.py
@ -256,7 +256,7 @@ class DinatModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
            self.assertTrue(x is None or isinstance(x, nn.Linear))
    def test_attention_outputs(self):
-        self.skipTest("Dinat's attention operation is handled entirely by NATTEN.")
+        self.skipTest(reason="Dinat's attention operation is handled entirely by NATTEN.")
    def check_hidden_states_output(self, inputs_dict, config, model_class, image_size):
        model = model_class(config)
--- a/tests/models/distilbert/test_modeling_distilbert.py
+++ b/tests/models/distilbert/test_modeling_distilbert.py
@ -281,7 +281,7 @@ class DistilBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
        for model_class in self.all_model_classes:
            # BertForMultipleChoice behaves incorrectly in JIT environments.
            if model_class == DistilBertForMultipleChoice:
-                return
+                self.skipTest(reason="DistilBertForMultipleChoice behaves incorrectly in JIT environments.")
            config.torchscript = True
            model = model_class(config=config)
--- a/tests/models/donut/test_modeling_donut_swin.py
+++ b/tests/models/donut/test_modeling_donut_swin.py
@ -168,8 +168,8 @@ class DonutSwinModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip(reason="DonutSwin does not use inputs_embeds")
    def test_inputs_embeds(self):
        # DonutSwin does not use inputs_embeds
        pass
    def test_model_get_set_embeddings(self):
--- a/tests/models/electra/test_tokenization_electra.py
+++ b/tests/models/electra/test_tokenization_electra.py
@ -78,7 +78,7 @@ class ElectraTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
--- a/tests/models/encodec/test_modeling_encodec.py
+++ b/tests/models/encodec/test_modeling_encodec.py
@ -178,29 +178,35 @@ class EncodecModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
            expected_arg_names = ["input_values", "padding_mask", "bandwidth"]
            self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names)
-    @unittest.skip("The EncodecModel is not transformers based, thus it does not have `inputs_embeds` logics")
+    @unittest.skip(reason="The EncodecModel is not transformers based, thus it does not have `inputs_embeds` logics")
    def test_inputs_embeds(self):
        pass
-    @unittest.skip("The EncodecModel is not transformers based, thus it does not have `inputs_embeds` logics")
+    @unittest.skip(reason="The EncodecModel is not transformers based, thus it does not have `inputs_embeds` logics")
    def test_model_get_set_embeddings(self):
        pass
-    @unittest.skip("The EncodecModel is not transformers based, thus it does not have the usual `attention` logic")
+    @unittest.skip(
        reason="The EncodecModel is not transformers based, thus it does not have the usual `attention` logic"
    )
    def test_retain_grad_hidden_states_attentions(self):
        pass
-    @unittest.skip("The EncodecModel is not transformers based, thus it does not have the usual `attention` logic")
+    @unittest.skip(
        reason="The EncodecModel is not transformers based, thus it does not have the usual `attention` logic"
    )
    def test_torchscript_output_attentions(self):
        pass
-    @unittest.skip("The EncodecModel is not transformers based, thus it does not have the usual `hidden_states` logic")
+    @unittest.skip(
        reason="The EncodecModel is not transformers based, thus it does not have the usual `hidden_states` logic"
    )
    def test_torchscript_output_hidden_state(self):
        pass
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
@ -288,7 +294,9 @@ class EncodecModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
            # (Even with this call, there are still memory leak by ~0.04MB)
            self.clear_torch_jit_class_registry()
-    @unittest.skip("The EncodecModel is not transformers based, thus it does not have the usual `attention` logic")
+    @unittest.skip(
        reason="The EncodecModel is not transformers based, thus it does not have the usual `attention` logic"
    )
    def test_attention_outputs(self):
        pass
@ -321,19 +329,21 @@ class EncodecModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
            hidden_states_with_chunk = model(**inputs)[0]
            self.assertTrue(torch.allclose(hidden_states_no_chunk, hidden_states_with_chunk, atol=1e-3))
-    @unittest.skip("The EncodecModel is not transformers based, thus it does not have the usual `hidden_states` logic")
+    @unittest.skip(
        reason="The EncodecModel is not transformers based, thus it does not have the usual `hidden_states` logic"
    )
    def test_hidden_states_output(self):
        pass
-    @unittest.skip("No support for low_cpu_mem_usage=True.")
+    @unittest.skip(reason="No support for low_cpu_mem_usage=True.")
    def test_save_load_low_cpu_mem_usage(self):
        pass
-    @unittest.skip("No support for low_cpu_mem_usage=True.")
+    @unittest.skip(reason="No support for low_cpu_mem_usage=True.")
    def test_save_load_low_cpu_mem_usage_checkpoints(self):
        pass
-    @unittest.skip("No support for low_cpu_mem_usage=True.")
+    @unittest.skip(reason="No support for low_cpu_mem_usage=True.")
    def test_save_load_low_cpu_mem_usage_no_safetensors(self):
        pass
--- a/tests/models/encoder_decoder/test_modeling_encoder_decoder.py
+++ b/tests/models/encoder_decoder/test_modeling_encoder_decoder.py
@ -1005,6 +1005,7 @@ class GPT2EncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
            "google-bert/bert-base-cased", "openai-community/gpt2"
        )
    @unittest.skip
    def test_encoder_decoder_model_shared_weights(self):
        pass
@ -1079,6 +1080,7 @@ class ProphetNetEncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
            "google-bert/bert-large-uncased", "microsoft/prophetnet-large-uncased"
        )
    @unittest.skip
    def test_encoder_decoder_model_shared_weights(self):
        pass
@ -1135,6 +1137,7 @@ class BartEncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
            "google-bert/bert-large-uncased", "facebook/bart-large"
        )
    @unittest.skip
    def test_encoder_decoder_model_shared_weights(self):
        pass
--- a/tests/models/ernie/test_modeling_ernie.py
+++ b/tests/models/ernie/test_modeling_ernie.py
@ -577,9 +577,8 @@ class ErnieModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
    def test_torchscript_device_change(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        for model_class in self.all_model_classes:
            # ErnieForMultipleChoice behaves incorrectly in JIT environments.
            if model_class == ErnieForMultipleChoice:
-                return
+                self.skipTest(reason="ErnieForMultipleChoice behaves incorrectly in JIT environments.")
            config.torchscript = True
            model = model_class(config=config)
--- a/tests/models/esm/test_modeling_esm.py
+++ b/tests/models/esm/test_modeling_esm.py
@ -290,11 +290,11 @@ class EsmModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        self.assertEqual(position_ids.shape, expected_positions.shape)
        self.assertTrue(torch.all(torch.eq(position_ids, expected_positions)))
-    @unittest.skip("Esm does not support embedding resizing")
+    @unittest.skip(reason="Esm does not support embedding resizing")
    def test_resize_embeddings_untied(self):
        pass
-    @unittest.skip("Esm does not support embedding resizing")
+    @unittest.skip(reason="Esm does not support embedding resizing")
    def test_resize_tokens_embeddings(self):
        pass
--- a/tests/models/esm/test_modeling_esmfold.py
+++ b/tests/models/esm/test_modeling_esmfold.py
@ -184,7 +184,7 @@ class EsmFoldModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
-    @unittest.skip("Does not support attention outputs")
+    @unittest.skip(reason="Does not support attention outputs")
    def test_attention_outputs(self):
        pass
@ -192,75 +192,77 @@ class EsmFoldModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    def test_correct_missing_keys(self):
        pass
-    @unittest.skip("Esm does not support embedding resizing")
+    @unittest.skip(reason="Esm does not support embedding resizing")
    def test_resize_embeddings_untied(self):
        pass
-    @unittest.skip("Esm does not support embedding resizing")
+    @unittest.skip(reason="Esm does not support embedding resizing")
    def test_resize_tokens_embeddings(self):
        pass
-    @unittest.skip("ESMFold does not support passing input embeds!")
+    @unittest.skip(reason="ESMFold does not support passing input embeds!")
    def test_inputs_embeds(self):
        pass
-    @unittest.skip("ESMFold does not support head pruning.")
+    @unittest.skip(reason="ESMFold does not support head pruning.")
    def test_head_pruning(self):
        pass
-    @unittest.skip("ESMFold does not support head pruning.")
+    @unittest.skip(reason="ESMFold does not support head pruning.")
    def test_head_pruning_integration(self):
        pass
-    @unittest.skip("ESMFold does not support head pruning.")
+    @unittest.skip(reason="ESMFold does not support head pruning.")
    def test_head_pruning_save_load_from_config_init(self):
        pass
-    @unittest.skip("ESMFold does not support head pruning.")
+    @unittest.skip(reason="ESMFold does not support head pruning.")
    def test_head_pruning_save_load_from_pretrained(self):
        pass
-    @unittest.skip("ESMFold does not support head pruning.")
+    @unittest.skip(reason="ESMFold does not support head pruning.")
    def test_headmasking(self):
        pass
-    @unittest.skip("ESMFold does not output hidden states in the normal way.")
+    @unittest.skip(reason="ESMFold does not output hidden states in the normal way.")
    def test_hidden_states_output(self):
        pass
-    @unittest.skip("ESMfold does not output hidden states in the normal way.")
+    @unittest.skip(reason="ESMfold does not output hidden states in the normal way.")
    def test_retain_grad_hidden_states_attentions(self):
        pass
-    @unittest.skip("ESMFold only has one output format.")
+    @unittest.skip(reason="ESMFold only has one output format.")
    def test_model_outputs_equivalence(self):
        pass
-    @unittest.skip("This test doesn't work for ESMFold and doesn't test core functionality")
+    @unittest.skip(reason="This test doesn't work for ESMFold and doesn't test core functionality")
    def test_save_load_fast_init_from_base(self):
        pass
-    @unittest.skip("ESMFold does not support input chunking.")
+    @unittest.skip(reason="ESMFold does not support input chunking.")
    def test_feed_forward_chunking(self):
        pass
-    @unittest.skip("ESMFold doesn't respect you and it certainly doesn't respect your initialization arguments.")
+    @unittest.skip(
        reason="ESMFold doesn't respect you and it certainly doesn't respect your initialization arguments."
    )
    def test_initialization(self):
        pass
-    @unittest.skip("ESMFold doesn't support torchscript compilation.")
+    @unittest.skip(reason="ESMFold doesn't support torchscript compilation.")
    def test_torchscript_output_attentions(self):
        pass
-    @unittest.skip("ESMFold doesn't support torchscript compilation.")
+    @unittest.skip(reason="ESMFold doesn't support torchscript compilation.")
    def test_torchscript_output_hidden_state(self):
        pass
-    @unittest.skip("ESMFold doesn't support torchscript compilation.")
+    @unittest.skip(reason="ESMFold doesn't support torchscript compilation.")
    def test_torchscript_simple(self):
        pass
-    @unittest.skip("ESMFold doesn't support data parallel.")
+    @unittest.skip(reason="ESMFold doesn't support data parallel.")
    def test_multi_gpu_data_parallel_forward(self):
        pass
--- a/tests/models/falcon/test_modeling_falcon.py
+++ b/tests/models/falcon/test_modeling_falcon.py
@ -381,7 +381,7 @@ class FalconModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
            # If it doesn't support cache, pass the test
            if not hasattr(config, "use_cache"):
-                return
+                self.skipTest(reason="Model does not support cache")
            model = model_class(config).to(torch_device)
            if "use_cache" not in inputs:
@ -390,7 +390,7 @@ class FalconModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
            # If "past_key_values" is not returned, pass the test (e.g. RWKV uses a different cache name and format)
            if "past_key_values" not in outputs:
-                return
+                self.skipTest(reason="Model does not return past_key_values")
            num_hidden_layers = (
                getattr(config, "decoder_layers", None)
--- a/tests/models/fastspeech2_conformer/test_tokenization_fastspeech2_conformer.py
+++ b/tests/models/fastspeech2_conformer/test_tokenization_fastspeech2_conformer.py
@ -174,7 +174,7 @@ class FastSpeech2ConformerTokenizerTest(TokenizerTesterMixin, unittest.TestCase)
    def test_convert_tokens_to_string_format(self):
        pass
-    @unittest.skip("FastSpeech2Conformer tokenizer does not support pairs.")
+    @unittest.skip(reason="FastSpeech2Conformer tokenizer does not support pairs.")
    def test_maximum_encoding_length_pair_input(self):
        pass
--- a/tests/models/flaubert/test_modeling_flaubert.py
+++ b/tests/models/flaubert/test_modeling_flaubert.py
@ -477,7 +477,7 @@ class FlaubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
        for model_class in self.all_model_classes:
            # FlauBertForMultipleChoice behaves incorrectly in JIT environments.
            if model_class == FlaubertForMultipleChoice:
-                return
+                self.skipTest(reason="FlauBertForMultipleChoice behaves incorrectly in JIT environments.")
            config.torchscript = True
            model = model_class(config=config)
--- a/tests/models/flava/test_modeling_flava.py
+++ b/tests/models/flava/test_modeling_flava.py
@ -176,8 +176,8 @@ class FlavaImageModelTest(ModelTesterMixin, unittest.TestCase):
    def test_config(self):
        self.config_tester.run_common_tests()
    @unittest.skip("Flava does not use input_ids")
    def test_inputs_embeds(self):
        # FLAVA does not use inputs_embeds
        pass
    def test_model_get_set_embeddings(self):
@ -300,9 +300,11 @@ class FlavaImageModelTest(ModelTesterMixin, unittest.TestCase):
            check_hidden_states_output(inputs_dict, config, model_class)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -318,13 +320,13 @@ class FlavaImageModelTest(ModelTesterMixin, unittest.TestCase):
    def test_training_gradient_checkpointing_use_reentrant_false(self):
        pass
-    # skip this test as FlavaImageModel has no base class and is
+    @unittest.skip(reason="FlavaImageModel has no base class and is not available in MODEL_MAPPING")
    # not available in MODEL_MAPPING
    def test_save_load_fast_init_from_base(self):
        pass
    # skip this test as FlavaImageModel has no base class and is
    # not available in MODEL_MAPPING
    @unittest.skip(reason="FlavaImageModel has no base class and is not available in MODEL_MAPPING")
    def test_save_load_fast_init_to_base(self):
        pass
@ -459,9 +461,11 @@ class FlavaTextModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -477,17 +481,16 @@ class FlavaTextModelTest(ModelTesterMixin, unittest.TestCase):
    def test_training_gradient_checkpointing_use_reentrant_false(self):
        pass
    @unittest.skip(reason="FLAVA does not use input_embeds")
    def test_inputs_embeds(self):
        # FLAVA does not use inputs_embeds
        pass
-    # skip this test as FlavaTextModel has no base class and is
+    @unittest.skip(reason="FlavaTextModel has no base class and is not available in MODEL_MAPPING")
    # not available in MODEL_MAPPING
    def test_save_load_fast_init_from_base(self):
        pass
-    # skip this test as FlavaTextModel has no base class and is
+    @unittest.skip(reason="FlavaTextModel has no base class and is not available in MODEL_MAPPING")
    # not available in MODEL_MAPPING
    def test_save_load_fast_init_to_base(self):
        pass
@ -619,13 +622,15 @@ class FlavaMultimodalModelTest(ModelTesterMixin, unittest.TestCase):
            expected_arg_names = ["hidden_states"]
            self.assertListEqual(arg_names[:1], expected_arg_names)
    @unittest.skip("FLAVA does not have input embeddings")
    def test_model_get_set_embeddings(self):
        # No embedding in multimodal model
        pass
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -641,17 +646,15 @@ class FlavaMultimodalModelTest(ModelTesterMixin, unittest.TestCase):
    def test_training_gradient_checkpointing_use_reentrant_false(self):
        pass
    @unittest.skip(reason="FLAVA does not use input_embeds")
    def test_inputs_embeds(self):
        # FLAVA does not use inputs_embeds
        pass
-    # skip this test as FlavaMultimodalModel has no base class and is
+    @unittest.skip(reason="FlavaMultimodalModel has no base class and is not available in MODEL_MAPPING")
    # not available in MODEL_MAPPING
    def test_save_load_fast_init_from_base(self):
        pass
-    # skip this test as FlavaMultimodalModel has no base class and is
+    @unittest.skip(reason="FlavaMultimodalModel has no base class and is not available in MODEL_MAPPING")
    # not available in MODEL_MAPPING
    def test_save_load_fast_init_to_base(self):
        pass
@ -742,20 +745,23 @@ class FlavaImageCodebookTest(ModelTesterMixin, unittest.TestCase):
    def test_attention_outputs(self):
        pass
    @unittest.skip(reason="No embedding in multimodal model")
    def test_model_get_set_embeddings(self):
        # No embedding in multimodal model
        pass
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_hidden_states_output(self):
        pass
    @unittest.skip(reason="FlavaImageCodebook has no attentions")
    def test_retain_grad_hidden_states_attentions(self):
        # no attentions
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -771,20 +777,19 @@ class FlavaImageCodebookTest(ModelTesterMixin, unittest.TestCase):
    def test_training_gradient_checkpointing_use_reentrant_false(self):
        pass
    @unittest.skip(reason="FLAVA does not use input_embeds")
    def test_inputs_embeds(self):
        # FLAVA does not use inputs_embeds
        pass
    @unittest.skip
    def test_model_outputs_equivalence(self):
        pass
-    # skip this test as FlavaImageCodebook has no base class and is
+    @unittest.skip(reason="FlavaImageCodebook has no base class and is not available in MODEL_MAPPING")
    # not available in MODEL_MAPPING
    def test_save_load_fast_init_from_base(self):
        pass
-    # skip this test as FlavaImageCodebook has no base class and is
+    @unittest.skip(reason="FlavaImageCodebook has no base class and is not available in MODEL_MAPPING")
    # not available in MODEL_MAPPING
    def test_save_load_fast_init_to_base(self):
        pass
@ -931,19 +936,19 @@ class FlavaModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
        self.model_tester.create_and_check_model(*config_and_inputs)
-    # hidden_states are tested in individual model tests
+    @unittest.skip(reason="tested in individual model tests")
    def test_hidden_states_output(self):
        pass
-    # input_embeds are tested in individual model tests
+    @unittest.skip(reason="tested in individual model tests")
    def test_inputs_embeds(self):
        pass
-    # tested in individual model tests
+    @unittest.skip(reason="tested in individual model tests")
    def test_retain_grad_hidden_states_attentions(self):
        pass
-    # FlavaModel does not have input/output embeddings
+    @unittest.skip(reason="FlavaModel does not have input/output embeddings")
    def test_model_get_set_embeddings(self):
        pass
@ -973,7 +978,7 @@ class FlavaModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
--- a/tests/models/fnet/test_modeling_fnet.py
+++ b/tests/models/fnet/test_modeling_fnet.py
@ -321,6 +321,7 @@ class FNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        return inputs_dict
    # Overriden Tests
    @unittest.skip
    def test_attention_outputs(self):
        pass
--- a/tests/models/fnet/test_tokenization_fnet.py
+++ b/tests/models/fnet/test_tokenization_fnet.py
@ -69,7 +69,7 @@ class FNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
@ -194,7 +194,7 @@ class FNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding(self, max_length=50):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
--- a/tests/models/fsmt/test_modeling_fsmt.py
+++ b/tests/models/fsmt/test_modeling_fsmt.py
@ -263,7 +263,7 @@ class FSMTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
                model2, info = model_class.from_pretrained(tmpdirname, output_loading_info=True)
            self.assertEqual(info["missing_keys"], [])
-    @unittest.skip("Test has a segmentation fault on torch 1.8.0")
+    @unittest.skip(reason="Test has a segmentation fault on torch 1.8.0")
    def test_export_to_onnx(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs()
        model = FSMTModel(config).to(torch_device)
@ -312,23 +312,23 @@ class FSMTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
            2,
        )
-    @unittest.skip("can't be implemented for FSMT due to dual vocab.")
+    @unittest.skip(reason="can't be implemented for FSMT due to dual vocab.")
    def test_resize_tokens_embeddings(self):
        pass
-    @unittest.skip("Passing inputs_embeds not implemented for FSMT.")
+    @unittest.skip(reason="Passing inputs_embeds not implemented for FSMT.")
    def test_inputs_embeds(self):
        pass
-    @unittest.skip("Input ids is required for FSMT.")
+    @unittest.skip(reason="Input ids is required for FSMT.")
    def test_inputs_embeds_matches_input_ids(self):
        pass
-    @unittest.skip("model weights aren't tied in FSMT.")
+    @unittest.skip(reason="model weights aren't tied in FSMT.")
    def test_tie_model_weights(self):
        pass
-    @unittest.skip("TODO: Decoder embeddings cannot be resized at the moment")
+    @unittest.skip(reason="TODO: Decoder embeddings cannot be resized at the moment")
    def test_resize_embeddings_untied(self):
        pass
@ -582,7 +582,7 @@ class TestSinusoidalPositionalEmbeddings(unittest.TestCase):
        # odd num_embeddings is allowed
        SinusoidalPositionalEmbedding(num_positions=5, embedding_dim=4, padding_idx=self.padding_idx).to(torch_device)
-    @unittest.skip("different from marian (needs more research)")
+    @unittest.skip(reason="different from marian (needs more research)")
    def test_positional_emb_weights_against_marian(self):
        desired_weights = torch.tensor(
            [
--- a/tests/models/fsmt/test_tokenization_fsmt.py
+++ b/tests/models/fsmt/test_tokenization_fsmt.py
@ -160,10 +160,10 @@ class FSMTTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        expected = ["us", "a</w>", "is</w>", "un", "i", "ted</w>", "st", "ates</w>", "of</w>", "am", "er", "ica</w>"]
        self.assertListEqual(tokens, expected)
-    @unittest.skip("FSMTConfig.__init__  requires non-optional args")
+    @unittest.skip(reason="FSMTConfig.__init__  requires non-optional args")
    def test_torch_encode_plus_sent_to_model(self):
        pass
-    @unittest.skip("FSMTConfig.__init__  requires non-optional args")
+    @unittest.skip(reason="FSMTConfig.__init__  requires non-optional args")
    def test_np_encode_plus_sent_to_model(self):
        pass
--- a/tests/models/fuyu/test_modeling_fuyu.py
+++ b/tests/models/fuyu/test_modeling_fuyu.py
@ -295,17 +295,17 @@ class FuyuModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        pass
    # TODO: Fix me (once this model gets more usage)
-    @unittest.skip("Does not work on the tiny model.")
+    @unittest.skip(reason="Does not work on the tiny model.")
    def test_disk_offload_bin(self):
        super().test_disk_offload()
    # TODO: Fix me (once this model gets more usage)
-    @unittest.skip("Does not work on the tiny model.")
+    @unittest.skip(reason="Does not work on the tiny model.")
    def test_disk_offload_safetensors(self):
        super().test_disk_offload()
    # TODO: Fix me (once this model gets more usage)
-    @unittest.skip("Does not work on the tiny model.")
+    @unittest.skip(reason="Does not work on the tiny model.")
    def test_model_parallelism(self):
        super().test_model_parallelism()
--- a/tests/models/gemma/test_modeling_gemma.py
+++ b/tests/models/gemma/test_modeling_gemma.py
@ -398,11 +398,11 @@ class GemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
            (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_labels),
        )
-    @unittest.skip("Gemma buffers include complex numbers, which breaks this test")
+    @unittest.skip(reason="Gemma buffers include complex numbers, which breaks this test")
    def test_save_load_fast_init_from_base(self):
        pass
-    @unittest.skip("Gemma uses GQA on all models so the KV cache is a non standard format")
+    @unittest.skip(reason="Gemma uses GQA on all models so the KV cache is a non standard format")
    def test_past_key_values_format(self):
        pass
@ -456,7 +456,7 @@ class GemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
    @pytest.mark.flash_attn_test
    @slow
    def test_flash_attn_2_inference_equivalence_right_padding(self):
-        self.skipTest("Gemma flash attention does not support right padding")
+        self.skipTest(reason="Gemma flash attention does not support right padding")
    @require_torch_sdpa
    @require_torch_gpu
@ -464,7 +464,7 @@ class GemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
    def test_sdpa_equivalence(self):
        for model_class in self.all_model_classes:
            if not model_class._supports_sdpa:
-                return
+                self.skipTest(reason="Model does not support SDPA")
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
            model = model_class(config)
@ -498,7 +498,7 @@ class GemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
    def test_flash_attn_2_equivalence(self):
        for model_class in self.all_model_classes:
            if not model_class._supports_flash_attn_2:
-                return
+                self.skipTest(reason="Model does not support Flash Attention 2")
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
            model = model_class(config)
@ -749,7 +749,7 @@ class GemmaIntegrationTest(unittest.TestCase):
        self.assertEqual(output_text, EXPECTED_TEXTS)
-    @unittest.skip("The test will not fit our CI runners")
+    @unittest.skip(reason="The test will not fit our CI runners")
    @require_read_token
    def test_model_7b_fp32(self):
        model_id = "google/gemma-7b"
@ -877,7 +877,7 @@ class GemmaIntegrationTest(unittest.TestCase):
        # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
        # work as intended. See https://github.com/pytorch/pytorch/issues/121943
        if version.parse(torch.__version__) < version.parse("2.3.0"):
-            self.skipTest("This test requires torch >= 2.3 to run.")
+            self.skipTest(reason="This test requires torch >= 2.3 to run.")
        NUM_TOKENS_TO_GENERATE = 40
        # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test
--- a/tests/models/gemma/test_tokenization_gemma.py
+++ b/tests/models/gemma/test_tokenization_gemma.py
@ -23,7 +23,6 @@ from transformers import (
    AddedToken,
    GemmaTokenizer,
    GemmaTokenizerFast,
    is_torch_available,
 )
 from transformers.convert_slow_tokenizer import convert_slow_tokenizer
 from transformers.testing_utils import (
@ -43,10 +42,6 @@ from ...test_tokenization_common import TokenizerTesterMixin
 SAMPLE_VOCAB = get_tests_dir("fixtures/test_sentencepiece.model")
 if is_torch_available():
    pass
@require_sentencepiece
@require_tokenizers
 class GemmaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@ -68,7 +63,7 @@ class GemmaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    @require_torch
    def test_batch_tokenization(self):
        if not self.test_seq2seq:
-            return
+            self.skipTest(reason="test_seq2seq is set to False")
        tokenizers = self.get_tokenizers()
        for tokenizer in tokenizers:
@ -88,7 +83,7 @@ class GemmaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        return_tensors="pt",
                    )
                except NotImplementedError:
-                    return
+                    self.skipTest(reason="Encountered NotImplementedError when calling tokenizer")
                self.assertEqual(batch.input_ids.shape[1], 3)
                # max_target_length will default to max_length if not specified
                batch = tokenizer(text, max_length=3, return_tensors="pt")
@ -99,7 +94,7 @@ class GemmaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                self.assertEqual(batch_encoder_only.attention_mask.shape[1], 3)
                self.assertNotIn("decoder_input_ids", batch_encoder_only)
-    @unittest.skip("Unfortunately way too slow to build a BPE with SentencePiece.")
+    @unittest.skip(reason="Unfortunately way too slow to build a BPE with SentencePiece.")
    def test_save_slow_from_fast_and_reload_fast(self):
        pass
@ -147,15 +142,15 @@ class GemmaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
            padding=False,
        )
-    @unittest.skip("worker 'gw4' crashed on CI, passing locally.")
+    @unittest.skip(reason="worker 'gw4' crashed on CI, passing locally.")
    def test_pickle_subword_regularization_tokenizer(self):
        pass
-    @unittest.skip("worker 'gw4' crashed on CI, passing locally.")
+    @unittest.skip(reason="worker 'gw4' crashed on CI, passing locally.")
    def test_subword_regularization_tokenizer(self):
        pass
-    @unittest.skip("Skipping")
+    @unittest.skip(reason="Skipping")
    def test_torch_encode_plus_sent_to_model(self):
        pass
@ -227,7 +222,7 @@ class GemmaIntegrationTest(unittest.TestCase):
        self.tokenizer.add_eos_token = False
        self.rust_tokenizer.add_eos_token = False
-    @unittest.skip("Not super important and always failing. Let's skip it")
+    @unittest.skip(reason="Not super important and always failing. Let's skip it")
    @slow
    def test_conversion(self):
        # This is excruciatingly slow since it has to recreate the entire merge
--- a/tests/models/git/test_modeling_git.py
+++ b/tests/models/git/test_modeling_git.py
@ -167,9 +167,11 @@ class GitVisionModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
--- a/tests/models/glpn/test_modeling_glpn.py
+++ b/tests/models/glpn/test_modeling_glpn.py
@ -168,11 +168,11 @@ class GLPNModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_depth_estimation(*config_and_inputs)
-    @unittest.skip("GLPN does not use inputs_embeds")
+    @unittest.skip(reason="GLPN does not use inputs_embeds")
    def test_inputs_embeds(self):
        pass
-    @unittest.skip("GLPN does not have get_input_embeddings method and get_output_embeddings methods")
+    @unittest.skip(reason="GLPN does not have get_input_embeddings method and get_output_embeddings methods")
    def test_model_get_set_embeddings(self):
        pass
@ -283,7 +283,7 @@ class GLPNModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def test_training(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.return_dict = True
--- a/tests/models/gpt2/test_tokenization_gpt2.py
+++ b/tests/models/gpt2/test_tokenization_gpt2.py
@ -98,7 +98,7 @@ class GPT2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer(add_prefix_space=True)
@ -126,6 +126,7 @@ class GPT2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        input_bpe_tokens = [14, 15, 10, 9, 3, 2, 15, 19]
        self.assertListEqual(rust_tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens)
    @unittest.skip
    def test_pretokenized_inputs(self, *args, **kwargs):
        # It's very difficult to mix/test pretokenization with byte-level
        # And get both GPT2 and Roberta to work at the same time (mostly an issue of adding a space before the string)
@ -247,7 +248,7 @@ class GPT2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertTrue(decode_s.startswith(bos_token))
        self.assertTrue(all(d.startswith(bos_token) for d in decode_s2))
-    # tokenizer has no padding token
+    @unittest.skip(reason="tokenizer has no padding token")
    def test_padding_different_model_input_name(self):
        pass
@ -331,7 +332,7 @@ class OPTTokenizationTest(unittest.TestCase):
        # Same as above
        self.assertEqual(tokens_ids, [2, 250, 1345, 9, 10, 4758])
-    @unittest.skip("This test is failing because of a bug in the fast tokenizer")
+    @unittest.skip(reason="This test is failing because of a bug in the fast tokenizer")
    def test_users_can_modify_bos(self):
        tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m", from_slow=True)
--- a/tests/models/gpt_bigcode/test_modeling_gpt_bigcode.py
+++ b/tests/models/gpt_bigcode/test_modeling_gpt_bigcode.py
@ -458,27 +458,27 @@ class GPTBigCodeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTeste
    def test_config(self):
        self.config_tester.run_common_tests()
-    @unittest.skip("MQA models does not support retain_grad")
+    @unittest.skip(reason="MQA models does not support retain_grad")
    def test_retain_grad_hidden_states_attentions(self):
        pass
-    @unittest.skip("Contrastive search not supported due to non-standard caching mechanism")
+    @unittest.skip(reason="Contrastive search not supported due to non-standard caching mechanism")
    def test_contrastive_generate(self):
        pass
-    @unittest.skip("Contrastive search not supported due to non-standard caching mechanism")
+    @unittest.skip(reason="Contrastive search not supported due to non-standard caching mechanism")
    def test_contrastive_generate_dict_outputs_use_cache(self):
        pass
-    @unittest.skip("CPU offload seems to be broken for some reason - tiny models keep hitting corner cases")
+    @unittest.skip(reason="CPU offload seems to be broken for some reason - tiny models keep hitting corner cases")
    def test_cpu_offload(self):
        pass
-    @unittest.skip("Disk offload seems to be broken for some reason - tiny models keep hitting corner cases")
+    @unittest.skip(reason="Disk offload seems to be broken for some reason - tiny models keep hitting corner cases")
    def test_disk_offload(self):
        pass
-    @unittest.skip("BigCodeGPT has a non-standard KV cache format.")
+    @unittest.skip(reason="BigCodeGPT has a non-standard KV cache format.")
    def test_past_key_values_format(self):
        pass
--- a/tests/models/gpt_neox_japanese/test_tokenization_gpt_neox_japanese.py
+++ b/tests/models/gpt_neox_japanese/test_tokenization_gpt_neox_japanese.py
@ -128,10 +128,11 @@ class GPTNeoXJapaneseTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        assert encoded_sentence == ids_1
        assert encoded_pair == ids_1 + ids_2
    @unittest.skip
    def test_conversion_reversible(self):
        # Intentionally convert some words to accommodate character fluctuations unique to Japanese
        pass
    @unittest.skip(reason="tokenizer has no padding token")
    def test_padding_different_model_input_name(self):
        # tokenizer has no padding token
        pass
--- a/tests/models/groupvit/test_modeling_groupvit.py
+++ b/tests/models/groupvit/test_modeling_groupvit.py
@ -262,9 +262,11 @@ class GroupViTVisionModelTest(ModelTesterMixin, unittest.TestCase):
                    ],
                )
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -458,9 +460,11 @@ class GroupViTTextModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
    @unittest.skip
    def test_training(self):
        pass
    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@ -618,7 +622,7 @@ class GroupViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
--- a/tests/models/herbert/test_tokenization_herbert.py
+++ b/tests/models/herbert/test_tokenization_herbert.py
@ -95,7 +95,7 @@ class HerbertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
--- a/tests/models/hubert/test_modeling_hubert.py
+++ b/tests/models/hubert/test_modeling_hubert.py
@ -350,22 +350,21 @@ class HubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.check_labels_out_of_vocab(*config_and_inputs)
-    # Hubert has no inputs_embeds
+    @unittest.skip(reason="Hubert has no inputs_embeds")
    def test_inputs_embeds(self):
        pass
-    # `input_ids` is renamed to `input_values`
+    @unittest.skip(reason="Hubert has no inputs_embeds")
    def test_forward_signature(self):
        pass
    # Hubert cannot resize token embeddings
    # since it has no tokens embeddings
    @unittest.skip(reason="Hubert has no tokens embeddings")
    def test_resize_tokens_embeddings(self):
        pass
-    # Hubert has no inputs_embeds
+    @unittest.skip(reason="Hubert has no inputs_embeds")
    # and thus the `get_input_embeddings` fn
    # is not implemented
    def test_model_get_set_embeddings(self):
        pass
@ -438,10 +437,10 @@ class HubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    # Hubert cannot be TorchScripted because of torch.nn.utils.weight_norm
    def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
        # TODO: fix it
-        self.skipTest("torch 2.1 breaks torch fx tests for wav2vec2/hubert.")
+        self.skipTest(reason="torch 2.1 breaks torch fx tests for wav2vec2/hubert.")
        if not is_torch_fx_available() or not self.fx_compatible:
-            return
+            self.skipTest(reason="torch fx is not available or not compatible with this model")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.return_dict = False
@ -615,22 +614,19 @@ class HubertRobustModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.check_labels_out_of_vocab(*config_and_inputs)
-    # Hubert has no inputs_embeds
+    @unittest.skip(reason="Hubert has no inputs_embeds")
    def test_inputs_embeds(self):
        pass
-    # `input_ids` is renamed to `input_values`
+    @unittest.skip(reason="Hubert has input_values instead of input_ids")
    def test_forward_signature(self):
        pass
-    # Hubert cannot resize token embeddings
+    @unittest.skip(reason="Hubert has no tokens embeddings")
    # since it has no tokens embeddings
    def test_resize_tokens_embeddings(self):
        pass
-    # Hubert has no inputs_embeds
+    @unittest.skip(reason="Hubert has no inputs_embeds")
    # and thus the `get_input_embeddings` fn
    # is not implemented
    def test_model_get_set_embeddings(self):
        pass
--- a/tests/models/ibert/test_modeling_ibert.py
+++ b/tests/models/ibert/test_modeling_ibert.py
@ -379,7 +379,7 @@ class IBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
            with torch.no_grad():
                model(**inputs)[0]
-    @unittest.skip("ibert overrides scaling to None if inputs_embeds")
+    @unittest.skip(reason="ibert overrides scaling to None if inputs_embeds")
    def test_inputs_embeds_matches_input_ids(self):
        pass
--- a/tests/models/idefics/test_image_processing_idefics.py
+++ b/tests/models/idefics/test_image_processing_idefics.py
@ -191,18 +191,18 @@ class IdeficsImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
        torch.testing.assert_close(pixel_values_transform_implied, pixel_values_transform_supplied, rtol=0.0, atol=0.0)
-    @unittest.skip("not supported")
+    @unittest.skip(reason="not supported")
    def test_call_numpy(self):
        pass
-    @unittest.skip("not supported")
+    @unittest.skip(reason="not supported")
    def test_call_numpy_4_channels(self):
        pass
-    @unittest.skip("not supported")
+    @unittest.skip(reason="not supported")
    def test_call_pil(self):
        pass
-    @unittest.skip("not supported")
+    @unittest.skip(reason="not supported")
    def test_call_pytorch(self):
        pass
--- a/tests/models/idefics/test_modeling_idefics.py
+++ b/tests/models/idefics/test_modeling_idefics.py
@ -316,7 +316,7 @@ class IdeficsModelTester:
    @slow
    @parameterized.expand([("float16",), ("bfloat16",), ("float32",)])
    def test_eager_matches_sdpa_inference(self, torch_dtype: str):
-        self.skipTest("Idefics has a hard requirement on SDPA, skipping this test")
+        self.skipTest(reason="Idefics has a hard requirement on SDPA, skipping this test")
@unittest.skipIf(not is_torch_greater_or_equal_than_2_0, reason="pytorch 2.0 or higher is required")
@ -422,13 +422,13 @@ class IdeficsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    def test_training(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        for model_class in self.all_model_classes:
            # IdeficsModel does not support training, users should use
            # IdeficsForVisionText2Text for this purpose
            if model_class == IdeficsModel:
-                return
+                self.skipTest(reason="IdeficsModel does not support training")
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
            config.return_dict = True
@ -442,13 +442,13 @@ class IdeficsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    def test_training_gradient_checkpointing(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        for model_class in self.all_model_classes:
            # IdeficsModel does not support training, users should use
            # IdeficsForVisionText2Text for this purpose
            if model_class == IdeficsModel:
-                return
+                self.skipTest(reason="IdeficsModel does not support training")
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
            config.use_cache = False
@ -575,7 +575,7 @@ class IdeficsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    @slow
    @parameterized.expand([("float16",), ("bfloat16",), ("float32",)])
    def test_eager_matches_sdpa_inference(self, torch_dtype: str):
-        self.skipTest("Idefics has a hard requirement on SDPA, skipping this test")
+        self.skipTest(reason="Idefics has a hard requirement on SDPA, skipping this test")
@unittest.skipIf(not is_torch_greater_or_equal_than_2_0, reason="pytorch 2.0 or higher is required")
@ -590,11 +590,11 @@ class IdeficsForVisionText2TextTest(IdeficsModelTest, unittest.TestCase):
        )
        self.config_tester = ConfigTester(self, config_class=IdeficsConfig, hidden_size=37)
-    @unittest.skip("We only test the model that takes in multiple images")
+    @unittest.skip(reason="We only test the model that takes in multiple images")
    def test_model(self):
        pass
-    @unittest.skip("We only test the model that takes in multiple images")
+    @unittest.skip(reason="We only test the model that takes in multiple images")
    def test_for_token_classification(self):
        pass
--- a/tests/models/idefics2/test_modeling_idefics2.py
+++ b/tests/models/idefics2/test_modeling_idefics2.py
@ -176,19 +176,19 @@ class Idefics2ModelTest(ModelTesterMixin, unittest.TestCase):
        self.model_tester = Idefics2VisionText2TextModelTester(self)
        self.config_tester = ConfigTester(self, config_class=Idefics2Config, has_text_modality=False)
-    @unittest.skip("input_embeds cannot be passed in without input_ids")
+    @unittest.skip(reason="input_embeds cannot be passed in without input_ids")
    def test_inputs_embeds():
        pass
-    @unittest.skip("input_embeds cannot be passed in without input_ids")
+    @unittest.skip(reason="input_embeds cannot be passed in without input_ids")
    def test_inputs_embeds_matches_input_ids(self):
        pass
-    @unittest.skip("Model does not support padding right")
+    @unittest.skip(reason="Model does not support padding right")
    def test_flash_attn_2_generate_padding_right(self):
        pass
-    @unittest.skip("Model does not support padding right")
+    @unittest.skip(reason="Model does not support padding right")
    def test_flash_attn_2_inference_padding_right(self):
        pass
@ -336,15 +336,15 @@ class Idefics2ForConditionalGenerationModelTest(GenerationTesterMixin, ModelTest
        self.model_tester = Idefics2VisionText2TextModelTester(self)
        self.config_tester = ConfigTester(self, config_class=Idefics2Config, has_text_modality=False)
-    @unittest.skip("input_embeds cannot be passed in without input_ids")
+    @unittest.skip(reason="input_embeds cannot be passed in without input_ids")
    def test_inputs_embeds():
        pass
-    @unittest.skip("Model does not support padding right")
+    @unittest.skip(reason="Model does not support padding right")
    def test_flash_attn_2_generate_padding_right(self):
        pass
-    @unittest.skip("Model does not support padding right")
+    @unittest.skip(reason="Model does not support padding right")
    def test_flash_attn_2_inference_padding_right(self):
        pass
--- a/tests/models/imagegpt/test_image_processing_imagegpt.py
+++ b/tests/models/imagegpt/test_image_processing_imagegpt.py
@ -176,7 +176,7 @@ class ImageGPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
                else:
                    self.assertEqual(image_processor_first[key], value)
-    @unittest.skip("ImageGPT requires clusters at initialization")
+    @unittest.skip(reason="ImageGPT requires clusters at initialization")
    def test_init_without_params(self):
        pass
@ -220,7 +220,7 @@ class ImageGPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
            tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape)
        )
-    @unittest.skip("ImageGPT assumes clusters for 3 channels")
+    @unittest.skip(reason="ImageGPT assumes clusters for 3 channels")
    def test_call_numpy_4_channels(self):
        pass
--- a/tests/models/imagegpt/test_modeling_imagegpt.py
+++ b/tests/models/imagegpt/test_modeling_imagegpt.py
@ -357,7 +357,7 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
            inputs_dict,
        ) = self.model_tester.prepare_config_and_inputs_for_common()
        if not self.test_resize_embeddings:
-            return
+            self.skipTest(reason="test_resize_embeddings is set to False")
        for model_class in self.all_model_classes:
            config = copy.deepcopy(original_config)
@ -404,13 +404,13 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
            inputs_dict,
        ) = self.model_tester.prepare_config_and_inputs_for_common()
        if not self.test_resize_embeddings:
-            return
+            self.skipTest(reason="test_resize_embeddings is set to False")
        original_config.tie_word_embeddings = False
        # if model cannot untied embeddings -> leave test
        if original_config.tie_word_embeddings:
-            return
+            self.skipTest(reason="tie_word_embeddings is set to False")
        for model_class in self.all_model_classes:
            config = copy.deepcopy(original_config)
@ -493,7 +493,7 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
@ -573,7 +573,7 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
            self.assertTrue(models_equal)
-    @unittest.skip("The model doesn't support left padding")  # and it's not used enough to be worth fixing :)
+    @unittest.skip(reason="The model doesn't support left padding")  # and it's not used enough to be worth fixing :)
    def test_left_padding_compatibility(self):
        pass
--- a/tests/models/informer/test_modeling_informer.py
+++ b/tests/models/informer/test_modeling_informer.py
@ -278,17 +278,19 @@ class InformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
            check_hidden_states_output(inputs_dict, config, model_class)
-    # Ignore since we have no tokens embeddings
+    @unittest.skip(reason="Informer does not have tokens embeddings")
    def test_resize_tokens_embeddings(self):
        pass
    @unittest.skip
    def test_model_outputs_equivalence(self):
        pass
    @unittest.skip
    def test_determinism(self):
        pass
-    @unittest.skip("randomly selects U keys while calculating attentions")
+    @unittest.skip(reason="randomly selects U keys while calculating attentions")
    def test_batching_equivalence(self):
        pass
--- a/tests/models/jamba/test_modeling_jamba.py
+++ b/tests/models/jamba/test_modeling_jamba.py
@ -390,7 +390,7 @@ class JambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
        Overriding the test_mismatched_shapes_have_properly_initialized_weights test because A_log and D params of the
        Mamba block are initialized differently and we tested that in test_initialization
        """
-        self.skipTest("Cumbersome and redundant for Jamba")
+        self.skipTest(reason="Cumbersome and redundant for Jamba")
    def test_attention_outputs(self):
        r"""
@ -638,9 +638,9 @@ class JambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
        Overriding the test_flash_attn_2_inference_padding_right test as the Jamba model, like Mixtral, doesn't support
        right padding + use cache with FA2
        """
-        self.skipTest("Jamba flash attention does not support right padding")
+        self.skipTest(reason="Jamba flash attention does not support right padding")
-    @unittest.skip("Jamba has its own special cache type")
+    @unittest.skip(reason="Jamba has its own special cache type")
    @parameterized.expand([(1, False), (1, True), (4, False)])
    def test_new_cache_format(self, num_beams, do_sample):
        pass
--- a/tests/models/jetmoe/test_modeling_jetmoe.py
+++ b/tests/models/jetmoe/test_modeling_jetmoe.py
@ -378,11 +378,11 @@ class JetMoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
        result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels)
        self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels))
-    @unittest.skip("JetMoe buffers include complex numbers, which breaks this test")
+    @unittest.skip(reason="JetMoe buffers include complex numbers, which breaks this test")
    def test_save_load_fast_init_from_base(self):
        pass
-    @unittest.skip("JetMoe uses MoA on all models so the KV cache is a non standard format")
+    @unittest.skip(reason="JetMoe uses MoA on all models so the KV cache is a non standard format")
    def test_past_key_values_format(self):
        pass
@ -470,7 +470,7 @@ class JetMoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
    @pytest.mark.flash_attn_test
    @slow
    def test_flash_attn_2_inference_equivalence_right_padding(self):
-        self.skipTest("JetMoe flash attention does not support right padding")
+        self.skipTest(reason="JetMoe flash attention does not support right padding")
@require_torch
--- a/tests/models/kosmos2/test_modeling_kosmos2.py
+++ b/tests/models/kosmos2/test_modeling_kosmos2.py
@ -375,7 +375,7 @@ class Kosmos2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    # overwrite from common in order to use `config.text_config.vocab_size` instead of `config.vocab_size`
    def test_tie_model_weights(self):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@ -429,7 +429,7 @@ class Kosmos2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
--- a/tests/models/layoutlm/test_tokenization_layoutlm.py
+++ b/tests/models/layoutlm/test_tokenization_layoutlm.py
@ -69,6 +69,7 @@ class LayoutLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"])
        self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9])
    @unittest.skip
    def test_special_tokens_as_you_expect(self):
        """If you are training a seq2seq model that expects a decoder_prefix token make sure it is prepended to decoder_input_ids"""
        pass
--- a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
@ -96,7 +96,7 @@ class LayoutLMv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
        image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42)
        self.assertEqual(image_processor.size, {"height": 42, "width": 42})
-    @unittest.skip("Tesseract version is not correct in ci. @Arthur FIXME")
+    @unittest.skip(reason="Tesseract version is not correct in ci. @Arthur FIXME")
    def test_layoutlmv2_integration_test(self):
        # with apply_OCR = True
        image_processing = LayoutLMv2ImageProcessor()
--- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
@ -414,7 +414,7 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
            check_hidden_states_output(inputs_dict, config, model_class)
-    @unittest.skip("We cannot configure detectron2 to output a smaller backbone")
+    @unittest.skip(reason="We cannot configure detectron2 to output a smaller backbone")
    def test_model_is_small(self):
        pass
--- a/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py
@ -195,7 +195,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
            tokenizer.tokenize(" \tHeLLo!how  \n Are yoU? [UNK]"), ["HeLLo", "!", "how", "Are", "yoU", "?", "[UNK]"]
        )
-    @unittest.skip("Chat template tests don't play well with table/layout models.")
+    @unittest.skip(reason="Chat template tests don't play well with table/layout models.")
    def test_chat_template_batched(self):
        pass
@ -385,11 +385,11 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                decoded = tokenizer.decode(encoded, spaces_between_special_tokens=self.space_between_special_tokens)
                self.assertIn(decoded, [output, output.lower()])
-    @unittest.skip("Not implemented")
+    @unittest.skip(reason="Not implemented")
    def test_right_and_left_truncation(self):
        pass
-    @unittest.skip("Not implemented")
+    @unittest.skip(reason="Not implemented")
    def test_split_special_tokens(self):
        pass
@ -814,7 +814,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding_warning_message_fast_tokenizer(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        words, boxes = self.get_words_and_boxes_batch()
@ -835,7 +835,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        )
        if not self.test_slow_tokenizer:
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        tokenizer_slow = self.get_tokenizer()
@ -942,7 +942,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        encoded_sequences_batch_padded_2[key],
                    )
-    @unittest.skip("batch_encode_plus does not handle overflowing tokens.")
+    @unittest.skip(reason="batch_encode_plus does not handle overflowing tokens.")
    def test_batch_encode_plus_overflowing_tokens(self):
        pass
@ -1003,7 +1003,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.pad_token is None:
-                    self.skipTest("No padding token.")
+                    self.skipTest(reason="No padding token.")
                else:
                    words, boxes = self.get_words_and_boxes()
@ -1046,7 +1046,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_build_inputs_with_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@ -1290,13 +1290,13 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
-                    return
+                    self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")
                config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
                config = config_class()
                if config.is_encoder_decoder or config.pad_token_id is None:
-                    return
+                    self.skipTest(reason="Model is an encoder-decoder or has no pad token id set.")
                model = model_class(config)
@ -1327,11 +1327,11 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
@ -1349,7 +1349,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_tokenization_python_rust_equals(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@ -1403,7 +1403,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_embeded_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@ -1593,7 +1593,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        new_tokenizer = tokenizer.train_new_from_iterator(SMALL_TRAINING_CORPUS, 100)
@ -1630,7 +1630,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer_with_special_tokens_change(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        # Test with a special tokens map
@ -1743,7 +1743,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding_different_model_input_name(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@ -1837,7 +1837,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        self.assertEqual(len(tokens[key].shape), 3)
                        self.assertEqual(tokens[key].shape[-1], 4)
-    @unittest.skip("TO DO: overwrite this very extensive test.")
+    @unittest.skip(reason="TO DO: overwrite this very extensive test.")
    def test_alignement_methods(self):
        pass
@ -1875,7 +1875,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        return words, boxes, output_ids
-    # @unittest.skip("LayoutLMv2 tokenizer requires boxes besides sequences.")
+    # @unittest.skip(reason="LayoutLMv2 tokenizer requires boxes besides sequences.")
    def test_maximum_encoding_length_pair_input(self):
        tokenizers = self.get_tokenizers(do_lower_case=False, model_max_length=100)
        for tokenizer in tokenizers:
@ -2237,7 +2237,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                    self.assertEqual(bbox, bbox_second_sequence)
                    self.assertEqual(overflowing_bbox, overflowing_token_bbox_second_sequence_slow)
-    # @unittest.skip("LayoutLMv2 tokenizer requires boxes besides sequences.")
+    # @unittest.skip(reason="LayoutLMv2 tokenizer requires boxes besides sequences.")
    def test_maximum_encoding_length_single_input(self):
        tokenizers = self.get_tokenizers(do_lower_case=False, model_max_length=100)
        for tokenizer in tokenizers:
@ -2359,15 +2359,15 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                    self.assertEqual(bbox, sequence["bbox"][:-2])
                    self.assertEqual(overflowing_bbox, sequence["bbox"][-(2 + stride) :])
-    @unittest.skip("LayoutLMv2 tokenizer requires boxes besides sequences.")
+    @unittest.skip(reason="LayoutLMv2 tokenizer requires boxes besides sequences.")
    def test_pretokenized_inputs(self):
        pass
-    @unittest.skip("LayoutLMv2 tokenizer always expects pretokenized inputs.")
+    @unittest.skip(reason="LayoutLMv2 tokenizer always expects pretokenized inputs.")
    def test_compare_pretokenized_inputs(self):
        pass
-    @unittest.skip("LayoutLMv2 fast tokenizer does not support prepare_for_model")
+    @unittest.skip(reason="LayoutLMv2 fast tokenizer does not support prepare_for_model")
    def test_compare_prepare_for_model(self):
        pass
@ -2476,10 +2476,10 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertDictEqual(dict(encoding_p), expected_results)
        self.assertDictEqual(dict(encoding_r), expected_results)
-    @unittest.skip("Doesn't support another framework than PyTorch")
+    @unittest.skip(reason="Doesn't support another framework than PyTorch")
    def test_np_encode_plus_sent_to_model(self):
        pass
-    @unittest.skip("Chat is not supported")
+    @unittest.skip(reason="Chat is not supported")
    def test_chat_template(self):
        pass
--- a/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py
@ -140,7 +140,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        output_text = "lower newer"
        return input_text, output_text
-    @unittest.skip("Chat template tests don't play well with table/layout models.")
+    @unittest.skip(reason="Chat template tests don't play well with table/layout models.")
    def test_chat_template_batched(self):
        pass
@ -265,11 +265,11 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                decoded = tokenizer.decode(encoded, spaces_between_special_tokens=self.space_between_special_tokens)
                self.assertIn(decoded, [output, output.lower()])
-    @unittest.skip("Not implemented")
+    @unittest.skip(reason="Not implemented")
    def test_right_and_left_truncation(self):
        pass
-    @unittest.skip("Not implemented")
+    @unittest.skip(reason="Not implemented")
    def test_split_special_tokens(self):
        pass
@ -694,7 +694,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding_warning_message_fast_tokenizer(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        words, boxes = self.get_words_and_boxes_batch()
@ -715,7 +715,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        )
        if not self.test_slow_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer_slow = self.get_tokenizer()
@ -822,7 +822,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        encoded_sequences_batch_padded_2[key],
                    )
-    @unittest.skip("batch_encode_plus does not handle overflowing tokens.")
+    @unittest.skip(reason="batch_encode_plus does not handle overflowing tokens.")
    def test_batch_encode_plus_overflowing_tokens(self):
        pass
@ -883,7 +883,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.pad_token is None:
-                    self.skipTest("No padding token.")
+                    self.skipTest(reason="No padding token.")
                else:
                    words, boxes = self.get_words_and_boxes()
@ -926,7 +926,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_build_inputs_with_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@ -1168,13 +1168,13 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
-                    return
+                    self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")
                config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
                config = config_class()
                if config.is_encoder_decoder or config.pad_token_id is None:
-                    return
+                    self.skipTest(reason="Model is an encoder-decoder or has no pad token id set.")
                model = model_class(config)
@ -1205,11 +1205,11 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
@ -1227,7 +1227,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_tokenization_python_rust_equals(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@ -1281,7 +1281,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_embeded_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@ -1471,7 +1471,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        new_tokenizer = tokenizer.train_new_from_iterator(SMALL_TRAINING_CORPUS, 100)
@ -1508,7 +1508,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer_with_special_tokens_change(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        # Test with a special tokens map
@ -1621,7 +1621,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding_different_model_input_name(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@ -1720,7 +1720,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        self.assertEqual(len(tokens[key].shape), 3)
                        self.assertEqual(tokens[key].shape[-1], 4)
-    @unittest.skip("TO DO: overwrite this very extensive test.")
+    @unittest.skip(reason="TO DO: overwrite this very extensive test.")
    def test_alignement_methods(self):
        pass
@ -2272,15 +2272,15 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                    # self.assertEqual(bbox, sequence["bbox"][:-2])
                    # self.assertEqual(overflowing_bbox, sequence["bbox"][-(2 + stride) :])
-    @unittest.skip("LayoutLMv3 tokenizer requires boxes besides sequences.")
+    @unittest.skip(reason="LayoutLMv3 tokenizer requires boxes besides sequences.")
    def test_pretokenized_inputs(self):
        pass
-    @unittest.skip("LayoutLMv3 tokenizer always expects pretokenized inputs.")
+    @unittest.skip(reason="LayoutLMv3 tokenizer always expects pretokenized inputs.")
    def test_compare_pretokenized_inputs(self):
        pass
-    @unittest.skip("LayoutLMv3 fast tokenizer does not support prepare_for_model")
+    @unittest.skip(reason="LayoutLMv3 fast tokenizer does not support prepare_for_model")
    def test_compare_prepare_for_model(self):
        pass
@ -2393,7 +2393,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertDictEqual(dict(encoding_p), expected_results)
        self.assertDictEqual(dict(encoding_r), expected_results)
-    @unittest.skip("Doesn't support another framework than PyTorch")
+    @unittest.skip(reason="Doesn't support another framework than PyTorch")
    def test_np_encode_plus_sent_to_model(self):
        pass
@ -2408,13 +2408,13 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
-                    return
+                    self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")
                config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
                config = config_class()
                if config.is_encoder_decoder or config.pad_token_id is None:
-                    return
+                    self.skipTest(reason="Model is an encoder-decoder or has no pad token id set.")
                model = model_class(config)
@ -2433,6 +2433,6 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                model(encoded_sequence)
                model(batch_encoded_sequence)
-    @unittest.skip("Chat is not supported")
+    @unittest.skip(reason="Chat is not supported")
    def test_chat_template(self):
        pass
--- a/tests/models/layoutxlm/test_tokenization_layoutxlm.py
+++ b/tests/models/layoutxlm/test_tokenization_layoutxlm.py
@ -107,7 +107,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        output_text = "unwanted, running"
        return input_text, output_text
-    @unittest.skip("Chat template tests don't play well with table/layout models.")
+    @unittest.skip(reason="Chat template tests don't play well with table/layout models.")
    def test_chat_template_batched(self):
        pass
@ -115,7 +115,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    # this tokenizer
    def test_save_sentencepiece_tokenizer(self) -> None:
        if not self.test_sentencepiece or not self.test_slow_tokenizer:
-            return
+            self.skipTest(reason="test_sentencepiece or test_slow_tokenizer is set to False")
        # We want to verify that we will be able to save the tokenizer even if the original files that were used to
        # build the tokenizer have been deleted in the meantime.
        words, boxes = self.get_words_and_boxes()
@ -745,7 +745,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding_warning_message_fast_tokenizer(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        words, boxes = self.get_words_and_boxes_batch()
@ -766,7 +766,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        )
        if not self.test_slow_tokenizer:
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        tokenizer_slow = self.get_tokenizer()
@ -873,7 +873,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        encoded_sequences_batch_padded_2[key],
                    )
-    @unittest.skip("batch_encode_plus does not handle overflowing tokens.")
+    @unittest.skip(reason="batch_encode_plus does not handle overflowing tokens.")
    def test_batch_encode_plus_overflowing_tokens(self):
        pass
@ -934,7 +934,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.pad_token is None:
-                    self.skipTest("No padding token.")
+                    self.skipTest(reason="No padding token.")
                else:
                    words, boxes = self.get_words_and_boxes()
@ -977,7 +977,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_build_inputs_with_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@ -1066,7 +1066,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                shutil.rmtree(tmpdirname)
-    @unittest.skip("Not implemented")
+    @unittest.skip(reason="Not implemented")
    def test_right_and_left_truncation(self):
        pass
@ -1224,13 +1224,13 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
-                    return
+                    self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")
                config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
                config = config_class()
                if config.is_encoder_decoder or config.pad_token_id is None:
-                    return
+                    self.skipTest(reason="Model is an encoder-decoder or has no pad token id set.")
                model = model_class(config)
@ -1256,11 +1256,11 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
@ -1278,7 +1278,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_tokenization_python_rust_equals(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@ -1332,7 +1332,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_embeded_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@ -1522,7 +1522,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        new_tokenizer = tokenizer.train_new_from_iterator(SMALL_TRAINING_CORPUS, 100)
@ -1559,7 +1559,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer_with_special_tokens_change(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        # Test with a special tokens map
@ -1672,7 +1672,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding_different_model_input_name(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@ -1770,7 +1770,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_save_pretrained(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        self.tokenizers_list[0] = (self.rust_tokenizer_class, "hf-internal-testing/tiny-random-layoutxlm", {})
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
@ -1838,27 +1838,27 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                shutil.rmtree(tmpdirname2)
-    @unittest.skip("TO DO: overwrite this very extensive test.")
+    @unittest.skip(reason="TO DO: overwrite this very extensive test.")
    def test_alignement_methods(self):
        pass
-    @unittest.skip("layoutxlm tokenizer requires boxes besides sequences.")
+    @unittest.skip(reason="layoutxlm tokenizer requires boxes besides sequences.")
    def test_maximum_encoding_length_pair_input(self):
        pass
-    @unittest.skip("layoutxlm tokenizer requires boxes besides sequences.")
+    @unittest.skip(reason="layoutxlm tokenizer requires boxes besides sequences.")
    def test_maximum_encoding_length_single_input(self):
        pass
-    @unittest.skip("layoutxlm tokenizer requires boxes besides sequences.")
+    @unittest.skip(reason="layoutxlm tokenizer requires boxes besides sequences.")
    def test_pretokenized_inputs(self):
        pass
-    @unittest.skip("layoutxlm tokenizer always expects pretokenized inputs.")
+    @unittest.skip(reason="layoutxlm tokenizer always expects pretokenized inputs.")
    def test_compare_pretokenized_inputs(self):
        pass
-    @unittest.skip("layoutxlm fast tokenizer does not support prepare_for_model")
+    @unittest.skip(reason="layoutxlm fast tokenizer does not support prepare_for_model")
    def test_compare_prepare_for_model(self):
        pass
@ -1962,18 +1962,18 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertDictEqual(dict(encoding_p), expected_results)
        self.assertDictEqual(dict(encoding_r), expected_results)
-    @unittest.skip("Doesn't support another framework than PyTorch")
+    @unittest.skip(reason="Doesn't support another framework than PyTorch")
    def test_np_encode_plus_sent_to_model(self):
        pass
-    @unittest.skip("Doesn't use SentencePiece")
+    @unittest.skip(reason="Doesn't use SentencePiece")
    def test_sentencepiece_tokenize_and_convert_tokens_to_string(self):
        pass
-    @unittest.skip("Doesn't use SentencePiece")
+    @unittest.skip(reason="Doesn't use SentencePiece")
    def test_sentencepiece_tokenize_and_decode(self):
        pass
-    @unittest.skip("Chat is not supported")
+    @unittest.skip(reason="Chat is not supported")
    def test_chat_template(self):
        pass
--- a/tests/models/led/test_modeling_led.py
+++ b/tests/models/led/test_modeling_led.py
@ -378,8 +378,8 @@ class LEDModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
        model.generate(input_ids, attention_mask=attention_mask)
        model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
    @unittest.skip(reason="Longformer cannot keep gradients in attentions or hidden states")
    def test_retain_grad_hidden_states_attentions(self):
        # longformer cannot keep gradients in attentions or hidden states
        return
    def test_attention_outputs(self):
--- a/Show More
+++ b/Show More