diff --git a/utils/check_repo.py b/utils/check_repo.py
index dd990913ed1..b4a7d99a26e 100644
--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -93,18 +93,18 @@ IGNORE_NON_TESTED = PRIVATE_MODELS.copy() + [
 # Update this list with test files that don't have a tester with a `all_model_classes` variable and which don't
 # trigger the common tests.
 TEST_FILES_WITH_NO_COMMON_TESTS = [
-    "test_modeling_camembert.py",
-    "test_modeling_flax_mt5.py",
-    "test_modeling_mbart.py",
-    "test_modeling_mt5.py",
-    "test_modeling_pegasus.py",
-    "test_modeling_tf_camembert.py",
-    "test_modeling_tf_mt5.py",
-    "test_modeling_tf_xlm_roberta.py",
-    "test_modeling_xlm_prophetnet.py",
-    "test_modeling_xlm_roberta.py",
-    "test_modeling_vision_text_dual_encoder.py",
-    "test_modeling_flax_vision_text_dual_encoder.py",
+    "camembert/test_modeling_camembert.py",
+    "mt5/test_modeling_flax_mt5.py",
+    "mbart/test_modeling_mbart.py",
+    "mt5/test_modeling_mt5.py",
+    "pegasus/test_modeling_pegasus.py",
+    "camembert/test_modeling_tf_camembert.py",
+    "mt5/test_modeling_tf_mt5.py",
+    "xlm_roberta/test_modeling_tf_xlm_roberta.py",
+    "xlm_prophetnet/test_modeling_xlm_prophetnet.py",
+    "xlm_roberta/test_modeling_xlm_roberta.py",
+    "vision_text_dual_encoder/test_modeling_vision_text_dual_encoder.py",
+    "vision_text_dual_encoder/test_modeling_flax_vision_text_dual_encoder.py",
 ]
 
 # Update this list for models that are not in any of the auto MODEL_XXX_MAPPING. Being in this list is an exception and
@@ -295,13 +295,20 @@ def get_model_test_files():
         "test_modeling_tf_encoder_decoder",
     ]
     test_files = []
-    for filename in os.listdir(PATH_TO_TESTS):
-        if (
-            os.path.isfile(f"{PATH_TO_TESTS}/{filename}")
-            and filename.startswith("test_modeling")
-            and not os.path.splitext(filename)[0] in _ignore_files
-        ):
-            test_files.append(filename)
+    for file_or_dir in os.listdir(PATH_TO_TESTS):
+        path = os.path.join(PATH_TO_TESTS, file_or_dir)
+        if os.path.isdir(path):
+            filenames = [os.path.join(file_or_dir, file) for file in os.listdir(path)]
+        else:
+            filenames = [file_or_dir]
+
+        for filename in filenames:
+            if (
+                os.path.isfile(os.path.join(PATH_TO_TESTS, filename))
+                and "test_modeling" in filename
+                and not os.path.splitext(filename)[0] in _ignore_files
+            ):
+                test_files.append(filename)
     return test_files
 
 
@@ -356,9 +363,13 @@ def check_all_models_are_tested():
     test_files = get_model_test_files()
     failures = []
     for module in modules:
-        test_file = f"test_{module.__name__.split('.')[-1]}.py"
-        if test_file not in test_files:
+        test_file = [file for file in test_files if f"test_{module.__name__.split('.')[-1]}.py" in file]
+        if len(test_file) == 0:
             failures.append(f"{module.__name__} does not have its corresponding test file {test_file}.")
+        elif len(test_file) > 1:
+            failures.append(f"{module.__name__} has several test files: {test_file}.")
+        else:
+            test_file = test_file[0]
         new_failures = check_models_are_tested(module, test_file)
         if new_failures is not None:
             failures += new_failures
diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py
index e842055d93b..78fb572e16e 100644
--- a/utils/tests_fetcher.py
+++ b/utils/tests_fetcher.py
@@ -195,12 +195,24 @@ def get_test_dependencies(test_fname):
         content = f.read()
 
     # Tests only have relative imports for other test files
-    relative_imports = re.findall(r"from\s+\.(\S+)\s+import\s+([^\n]+)\n", content)
+    # TODO Sylvain: handle relative imports cleanly
+    relative_imports = re.findall(r"from\s+(\.\S+)\s+import\s+([^\n]+)\n", content)
     relative_imports = [test for test, imp in relative_imports if "# tests_ignore" not in imp]
-    dependencies = [os.path.join("tests", f"{test}.py") for test in relative_imports]
-    # Some tests use docstrings with relative imports in them and this could catch them, so we check the files
-    # actually exist
-    return [f for f in dependencies if os.path.isfile(f)]
+
+    # Removes the double trailing '..' for parent imports, and creates an absolute path from the root dir with
+    # `tests` as a prefix.
+    parent_imports = [imp.strip(".") for imp in relative_imports if ".." in imp]
+    parent_imports = [os.path.join("tests", f"{test.replace('.', os.path.sep)}.py") for test in parent_imports]
+
+    # Removes the single trailing '.' for current dir imports, and creates an absolute path from the root dir with
+    # tests/{module_name} as a prefix.
+    current_dir_imports = [imp.strip(".") for imp in relative_imports if ".." not in imp]
+    directory = os.path.sep.join(test_fname.split(os.path.sep)[:-1])
+    current_dir_imports = [
+        os.path.join(directory, f"{test.replace('.', os.path.sep)}.py") for test in current_dir_imports
+    ]
+
+    return [f for f in [*parent_imports, *current_dir_imports] if os.path.isfile(f)]
 
 
 def create_reverse_dependency_map():
@@ -227,6 +239,8 @@ def create_reverse_dependency_map():
         something_changed = False
         for m in all_files:
             for d in direct_deps[m]:
+                if d not in direct_deps:
+                    raise ValueError(f"KeyError:{d}. From {m}")
                 for dep in direct_deps[d]:
                     if dep not in direct_deps[m]:
                         direct_deps[m].append(dep)
@@ -246,47 +260,55 @@ def create_reverse_dependency_map():
 # Any module file that has a test name which can't be inferred automatically from its name should go here. A better
 # approach is to (re-)name the test file accordingly, and second best to add the correspondence map here.
 SPECIAL_MODULE_TO_TEST_MAP = {
-    "commands/add_new_model_like.py": "test_add_new_model_like.py",
+    "commands/add_new_model_like.py": "utils/test_add_new_model_like.py",
     "configuration_utils.py": "test_configuration_common.py",
-    "convert_graph_to_onnx.py": "test_onnx.py",
-    "data/data_collator.py": "test_data_collator.py",
+    "convert_graph_to_onnx.py": "onnx/test_onnx.py",
+    "data/data_collator.py": "trainer/test_data_collator.py",
     "deepspeed.py": "deepspeed/",
     "feature_extraction_sequence_utils.py": "test_sequence_feature_extraction_common.py",
     "feature_extraction_utils.py": "test_feature_extraction_common.py",
-    "file_utils.py": ["test_file_utils.py", "test_model_output.py"],
-    "modelcard.py": "test_model_card.py",
+    "file_utils.py": ["utils/test_file_utils.py", "utils/test_model_output.py"],
+    "modelcard.py": "utils/test_model_card.py",
     "modeling_flax_utils.py": "test_modeling_flax_common.py",
-    "modeling_tf_utils.py": ["test_modeling_tf_common.py", "test_modeling_tf_core.py"],
-    "modeling_utils.py": ["test_modeling_common.py", "test_offline.py"],
-    "models/auto/modeling_auto.py": ["test_modeling_auto.py", "test_modeling_tf_pytorch.py", "test_modeling_bort.py"],
-    "models/auto/modeling_flax_auto.py": "test_modeling_flax_auto.py",
+    "modeling_tf_utils.py": ["test_modeling_tf_common.py", "utils/test_modeling_tf_core.py"],
+    "modeling_utils.py": ["test_modeling_common.py", "utils/test_offline.py"],
+    "models/auto/modeling_auto.py": [
+        "auto/test_modeling_auto.py",
+        "auto/test_modeling_tf_pytorch.py",
+        "bort/test_modeling_bort.py",
+    ],
+    "models/auto/modeling_flax_auto.py": "auto/test_modeling_flax_auto.py",
     "models/auto/modeling_tf_auto.py": [
-        "test_modeling_tf_auto.py",
-        "test_modeling_tf_pytorch.py",
-        "test_modeling_tf_bort.py",
+        "auto/test_modeling_tf_auto.py",
+        "auto/test_modeling_tf_pytorch.py",
+        "bort/test_modeling_tf_bort.py",
     ],
-    "models/blenderbot_small/tokenization_blenderbot_small.py": "test_tokenization_small_blenderbot.py",
-    "models/blenderbot_small/tokenization_blenderbot_small_fast.py": "test_tokenization_small_blenderbot.py",
-    "models/gpt2/modeling_gpt2.py": ["test_modeling_gpt2.py", "test_modeling_megatron_gpt2.py"],
-    "pipelines/base.py": "test_pipelines_*.py",
+    "models/gpt2/modeling_gpt2.py": ["gpt2/test_modeling_gpt2.py", "megatron_gpt2/test_modeling_megatron_gpt2.py"],
+    "optimization.py": "optimization/test_optimization.py",
+    "optimization_tf.py": "optimization/test_optimization_tf.py",
+    "pipelines/base.py": "pipelines/test_pipelines_*.py",
     "pipelines/text2text_generation.py": [
-        "test_pipelines_text2text_generation.py",
-        "test_pipelines_summarization.py",
-        "test_pipelines_translation.py",
+        "pipelines/test_pipelines_text2text_generation.py",
+        "pipelines/test_pipelines_summarization.py",
+        "pipelines/test_pipelines_translation.py",
+    ],
+    "pipelines/zero_shot_classification.py": "pipelines/test_pipelines_zero_shot.py",
+    "testing_utils.py": "utils/test_skip_decorators.py",
+    "tokenization_utils.py": ["test_tokenization_common.py", "tokenization/test_tokenization_utils.py"],
+    "tokenization_utils_base.py": ["test_tokenization_common.py", "tokenization/test_tokenization_utils.py"],
+    "tokenization_utils_fast.py": [
+        "test_tokenization_common.py",
+        "tokenization/test_tokenization_utils.py",
+        "tokenization/test_tokenization_fast.py",
     ],
-    "pipelines/zero_shot_classification.py": "test_pipelines_zero_shot.py",
-    "testing_utils.py": "test_skip_decorators.py",
-    "tokenization_utils.py": "test_tokenization_common.py",
-    "tokenization_utils_base.py": "test_tokenization_common.py",
-    "tokenization_utils_fast.py": "test_tokenization_fast.py",
     "trainer.py": [
-        "test_trainer.py",
+        "trainer/test_trainer.py",
         "extended/test_trainer_ext.py",
-        "test_trainer_distributed.py",
-        "test_trainer_tpu.py",
+        "trainer/test_trainer_distributed.py",
+        "trainer/test_trainer_tpu.py",
     ],
-    "train_pt_utils.py": "test_trainer_utils.py",
-    "utils/versions.py": "test_versions_utils.py",
+    "train_pt_utils.py": "trainer/test_trainer_utils.py",
+    "utils/versions.py": "utils/test_versions_utils.py",
 }
 
 
@@ -311,21 +333,27 @@ def module_to_test_file(module_fname):
 
     # Special case for pipelines submodules
     if len(splits) >= 2 and splits[-2] == "pipelines":
-        default_test_file = f"tests/test_pipelines_{module_name}"
+        default_test_file = f"tests/pipelines/test_pipelines_{module_name}"
     # Special case for benchmarks submodules
     elif len(splits) >= 2 and splits[-2] == "benchmark":
-        return ["tests/test_benchmark.py", "tests/test_benchmark_tf.py"]
+        return ["tests/benchmark/test_benchmark.py", "tests/benchmark/test_benchmark_tf.py"]
     # Special case for commands submodules
     elif len(splits) >= 2 and splits[-2] == "commands":
-        return "tests/test_cli.py"
+        return "tests/utils/test_cli.py"
     # Special case for onnx submodules
     elif len(splits) >= 2 and splits[-2] == "onnx":
-        return ["tests/test_onnx.py", "tests/test_onnx_v2.py"]
+        return ["tests/onnx/test_onnx.py", "tests/onnx/test_onnx_v2.py"]
     # Special case for utils (not the one in src/transformers, the ones at the root of the repo).
     elif len(splits) > 0 and splits[0] == "utils":
-        default_test_file = f"tests/test_utils_{module_name}"
+        default_test_file = f"tests/utils/test_utils_{module_name}"
+    elif len(splits) > 4 and splits[2] == "models":
+        default_test_file = f"tests/{splits[3]}/test_{module_name}"
+    elif len(splits) > 2 and splits[2].startswith("generation"):
+        default_test_file = f"tests/generation/test_{module_name}"
+    elif len(splits) > 2 and splits[2].startswith("trainer"):
+        default_test_file = f"tests/trainer/test_{module_name}"
     else:
-        default_test_file = f"tests/test_{module_name}"
+        default_test_file = f"tests/utils/test_{module_name}"
 
     if os.path.isfile(default_test_file):
         return default_test_file
@@ -340,8 +368,8 @@ def module_to_test_file(module_fname):
 # This list contains the list of test files we expect never to be launched from a change in a module/util. Those are
 # launched separately.
 EXPECTED_TEST_FILES_NEVER_TOUCHED = [
-    "tests/test_doc_samples.py",  # Doc tests
-    "tests/test_pipelines_common.py",  # Actually checked by the pipeline based file
+    "tests/utils/test_doc_samples.py",  # Doc tests
+    "tests/pipelines/test_pipelines_common.py",  # Actually checked by the pipeline based file
     "tests/sagemaker/test_single_node_gpu.py",  # SageMaker test
     "tests/sagemaker/test_multi_node_model_parallel.py",  # SageMaker test
     "tests/sagemaker/test_multi_node_data_parallel.py",  # SageMaker test