Import structure & first three model refactors (#31329)

* Import structure & first three model refactors * Register -> Export. Export all in __all__. Sensible defaults according to filename. * Apply most comments from Amy and some comments from Lucain Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> Co-authored-by: Lucain Pouget <lucainp@gmail.com> * Style * Add comment * Clearer .py management * Raise if not in backend mapping * More specific type * More efficient listdir * Misc fixes --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> Co-authored-by: Lucain Pouget <lucainp@gmail.com>
2025-07-03 04:40:06 +06:00 · 2024-09-10 11:10:53 +02:00 · 2024-09-10 11:10:53 +02:00 · f24f084329
commit f24f084329
parent 7f112caac2
30 changed files with 937 additions and 553 deletions
--- a/2
+++ b/2
@ -53,7 +53,6 @@ quality:
 	@python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
 	ruff check $(check_dirs) setup.py conftest.py
 	ruff format --check $(check_dirs) setup.py conftest.py
-	python utils/custom_init_isort.py --check_only
 	python utils/sort_auto_mappings.py --check_only
 	python utils/check_doc_toc.py
 	python utils/check_docstrings.py --check_all
@ -62,7 +61,6 @@ quality:
 # Format source code automatically and check is there are any problems left that need manual fixing

 extra_style_checks:
-	python utils/custom_init_isort.py
 	python utils/sort_auto_mappings.py
 	python utils/check_doc_toc.py --fix_and_overwrite

--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@ -1500,7 +1500,6 @@ else:
            "BertForQuestionAnswering",
            "BertForSequenceClassification",
            "BertForTokenClassification",
-            "BertLayer",
            "BertLMHeadModel",
            "BertModel",
            "BertPreTrainedModel",
@ -1524,7 +1523,6 @@ else:
            "BigBirdForQuestionAnswering",
            "BigBirdForSequenceClassification",
            "BigBirdForTokenClassification",
-            "BigBirdLayer",
            "BigBirdModel",
            "BigBirdPreTrainedModel",
            "load_tf_weights_in_big_bird",
@ -1643,7 +1641,6 @@ else:
            "CanineForQuestionAnswering",
            "CanineForSequenceClassification",
            "CanineForTokenClassification",
-            "CanineLayer",
            "CanineModel",
            "CaninePreTrainedModel",
            "load_tf_weights_in_canine",
@ -1730,7 +1727,6 @@ else:
            "ConvBertForQuestionAnswering",
            "ConvBertForSequenceClassification",
            "ConvBertForTokenClassification",
-            "ConvBertLayer",
            "ConvBertModel",
            "ConvBertPreTrainedModel",
            "load_tf_weights_in_convbert",
@ -1959,7 +1955,6 @@ else:
            "QDQBertForQuestionAnswering",
            "QDQBertForSequenceClassification",
            "QDQBertForTokenClassification",
-            "QDQBertLayer",
            "QDQBertLMHeadModel",
            "QDQBertModel",
            "QDQBertPreTrainedModel",
@ -2211,7 +2206,6 @@ else:
            "FNetForQuestionAnswering",
            "FNetForSequenceClassification",
            "FNetForTokenClassification",
-            "FNetLayer",
            "FNetModel",
            "FNetPreTrainedModel",
        ]
@ -2312,7 +2306,6 @@ else:
            "GPTNeoXForQuestionAnswering",
            "GPTNeoXForSequenceClassification",
            "GPTNeoXForTokenClassification",
-            "GPTNeoXLayer",
            "GPTNeoXModel",
            "GPTNeoXPreTrainedModel",
        ]
@ -2320,7 +2313,6 @@ else:
    _import_structure["models.gpt_neox_japanese"].extend(
        [
            "GPTNeoXJapaneseForCausalLM",
-            "GPTNeoXJapaneseLayer",
            "GPTNeoXJapaneseModel",
            "GPTNeoXJapanesePreTrainedModel",
        ]
@ -2552,7 +2544,6 @@ else:
            "LongformerForTokenClassification",
            "LongformerModel",
            "LongformerPreTrainedModel",
-            "LongformerSelfAttention",
        ]
    )
    _import_structure["models.longt5"].extend(
@ -2585,7 +2576,6 @@ else:
            "LxmertModel",
            "LxmertPreTrainedModel",
            "LxmertVisualFeatureEncoder",
-            "LxmertXLayer",
        ]
    )
    _import_structure["models.m2m_100"].extend(
@ -2609,7 +2599,9 @@ else:
            "Mamba2PreTrainedModel",
        ]
    )
-    _import_structure["models.marian"].extend(["MarianForCausalLM", "MarianModel", "MarianMTModel"])
+    _import_structure["models.marian"].extend(
+        ["MarianForCausalLM", "MarianModel", "MarianMTModel", "MarianPreTrainedModel"]
+    )
    _import_structure["models.markuplm"].extend(
        [
            "MarkupLMForQuestionAnswering",
@ -2692,7 +2684,6 @@ else:
            "MobileBertForQuestionAnswering",
            "MobileBertForSequenceClassification",
            "MobileBertForTokenClassification",
-            "MobileBertLayer",
            "MobileBertModel",
            "MobileBertPreTrainedModel",
            "load_tf_weights_in_mobilebert",
@ -2738,7 +2729,6 @@ else:
            "MPNetForQuestionAnswering",
            "MPNetForSequenceClassification",
            "MPNetForTokenClassification",
-            "MPNetLayer",
            "MPNetModel",
            "MPNetPreTrainedModel",
        ]
@ -2828,7 +2818,6 @@ else:
            "NystromformerForQuestionAnswering",
            "NystromformerForSequenceClassification",
            "NystromformerForTokenClassification",
-            "NystromformerLayer",
            "NystromformerModel",
            "NystromformerPreTrainedModel",
        ]
@ -2942,7 +2931,6 @@ else:
            "PerceiverForMultimodalAutoencoding",
            "PerceiverForOpticalFlow",
            "PerceiverForSequenceClassification",
-            "PerceiverLayer",
            "PerceiverModel",
            "PerceiverPreTrainedModel",
        ]
@ -3078,11 +3066,9 @@ else:
    )
    _import_structure["models.reformer"].extend(
        [
-            "ReformerAttention",
            "ReformerForMaskedLM",
            "ReformerForQuestionAnswering",
            "ReformerForSequenceClassification",
-            "ReformerLayer",
            "ReformerModel",
            "ReformerModelWithLMHead",
            "ReformerPreTrainedModel",
@ -3103,7 +3089,6 @@ else:
            "RemBertForQuestionAnswering",
            "RemBertForSequenceClassification",
            "RemBertForTokenClassification",
-            "RemBertLayer",
            "RemBertModel",
            "RemBertPreTrainedModel",
            "load_tf_weights_in_rembert",
@ -3150,7 +3135,6 @@ else:
            "RoCBertForQuestionAnswering",
            "RoCBertForSequenceClassification",
            "RoCBertForTokenClassification",
-            "RoCBertLayer",
            "RoCBertModel",
            "RoCBertPreTrainedModel",
            "load_tf_weights_in_roc_bert",
@ -3164,7 +3148,6 @@ else:
            "RoFormerForQuestionAnswering",
            "RoFormerForSequenceClassification",
            "RoFormerForTokenClassification",
-            "RoFormerLayer",
            "RoFormerModel",
            "RoFormerPreTrainedModel",
            "load_tf_weights_in_roformer",
@ -3221,7 +3204,6 @@ else:
            "SegformerDecodeHead",
            "SegformerForImageClassification",
            "SegformerForSemanticSegmentation",
-            "SegformerLayer",
            "SegformerModel",
            "SegformerPreTrainedModel",
        ]
@ -3280,7 +3262,6 @@ else:
        [
            "SplinterForPreTraining",
            "SplinterForQuestionAnswering",
-            "SplinterLayer",
            "SplinterModel",
            "SplinterPreTrainedModel",
        ]
@ -3293,7 +3274,6 @@ else:
            "SqueezeBertForSequenceClassification",
            "SqueezeBertForTokenClassification",
            "SqueezeBertModel",
-            "SqueezeBertModule",
            "SqueezeBertPreTrainedModel",
        ]
    )
@ -3492,7 +3472,6 @@ else:
            "ViltForMaskedLM",
            "ViltForQuestionAnswering",
            "ViltForTokenClassification",
-            "ViltLayer",
            "ViltModel",
            "ViltPreTrainedModel",
        ]
@ -3512,7 +3491,6 @@ else:
            "VisualBertForQuestionAnswering",
            "VisualBertForRegionToPhraseAlignment",
            "VisualBertForVisualReasoning",
-            "VisualBertLayer",
            "VisualBertModel",
            "VisualBertPreTrainedModel",
        ]
@ -3528,7 +3506,6 @@ else:
    _import_structure["models.vit_mae"].extend(
        [
            "ViTMAEForPreTraining",
-            "ViTMAELayer",
            "ViTMAEModel",
            "ViTMAEPreTrainedModel",
        ]
@ -3708,7 +3685,6 @@ else:
            "YosoForQuestionAnswering",
            "YosoForSequenceClassification",
            "YosoForTokenClassification",
-            "YosoLayer",
            "YosoModel",
            "YosoPreTrainedModel",
        ]
@ -3855,7 +3831,6 @@ else:
    )
    _import_structure["models.bert"].extend(
        [
-            "TFBertEmbeddings",
            "TFBertForMaskedLM",
            "TFBertForMultipleChoice",
            "TFBertForNextSentencePrediction",
@ -3921,7 +3896,6 @@ else:
            "TFConvBertForQuestionAnswering",
            "TFConvBertForSequenceClassification",
            "TFConvBertForTokenClassification",
-            "TFConvBertLayer",
            "TFConvBertModel",
            "TFConvBertPreTrainedModel",
        ]
@ -4152,7 +4126,6 @@ else:
            "TFLongformerForTokenClassification",
            "TFLongformerModel",
            "TFLongformerPreTrainedModel",
-            "TFLongformerSelfAttention",
        ]
    )
    _import_structure["models.lxmert"].extend(
@ -4253,7 +4226,6 @@ else:
            "TFRemBertForQuestionAnswering",
            "TFRemBertForSequenceClassification",
            "TFRemBertForTokenClassification",
-            "TFRemBertLayer",
            "TFRemBertModel",
            "TFRemBertPreTrainedModel",
        ]
@ -4299,7 +4271,6 @@ else:
            "TFRoFormerForQuestionAnswering",
            "TFRoFormerForSequenceClassification",
            "TFRoFormerForTokenClassification",
-            "TFRoFormerLayer",
            "TFRoFormerModel",
            "TFRoFormerPreTrainedModel",
        ]
@ -5829,7 +5800,8 @@ if TYPE_CHECKING:
        from .models.llama import LlamaTokenizer
        from .models.m2m_100 import M2M100Tokenizer
        from .models.marian import MarianTokenizer
-        from .models.mbart import MBart50Tokenizer, MBartTokenizer
+        from .models.mbart import MBartTokenizer
+        from .models.mbart50 import MBart50Tokenizer
        from .models.mluke import MLukeTokenizer
        from .models.mt5 import MT5Tokenizer
        from .models.nllb import NllbTokenizer
@ -6300,7 +6272,6 @@ if TYPE_CHECKING:
            BertForQuestionAnswering,
            BertForSequenceClassification,
            BertForTokenClassification,
-            BertLayer,
            BertLMHeadModel,
            BertModel,
            BertPreTrainedModel,
@ -6320,7 +6291,6 @@ if TYPE_CHECKING:
            BigBirdForQuestionAnswering,
            BigBirdForSequenceClassification,
            BigBirdForTokenClassification,
-            BigBirdLayer,
            BigBirdModel,
            BigBirdPreTrainedModel,
            load_tf_weights_in_big_bird,
@ -6415,7 +6385,6 @@ if TYPE_CHECKING:
            CanineForQuestionAnswering,
            CanineForSequenceClassification,
            CanineForTokenClassification,
-            CanineLayer,
            CanineModel,
            CaninePreTrainedModel,
            load_tf_weights_in_canine,
@ -6488,7 +6457,6 @@ if TYPE_CHECKING:
            ConvBertForQuestionAnswering,
            ConvBertForSequenceClassification,
            ConvBertForTokenClassification,
-            ConvBertLayer,
            ConvBertModel,
            ConvBertPreTrainedModel,
            load_tf_weights_in_convbert,
@ -6673,7 +6641,6 @@ if TYPE_CHECKING:
            QDQBertForQuestionAnswering,
            QDQBertForSequenceClassification,
            QDQBertForTokenClassification,
-            QDQBertLayer,
            QDQBertLMHeadModel,
            QDQBertModel,
            QDQBertPreTrainedModel,
@ -6872,7 +6839,6 @@ if TYPE_CHECKING:
            FNetForQuestionAnswering,
            FNetForSequenceClassification,
            FNetForTokenClassification,
-            FNetLayer,
            FNetModel,
            FNetPreTrainedModel,
        )
@ -6960,13 +6926,11 @@ if TYPE_CHECKING:
            GPTNeoXForQuestionAnswering,
            GPTNeoXForSequenceClassification,
            GPTNeoXForTokenClassification,
-            GPTNeoXLayer,
            GPTNeoXModel,
            GPTNeoXPreTrainedModel,
        )
        from .models.gpt_neox_japanese import (
            GPTNeoXJapaneseForCausalLM,
-            GPTNeoXJapaneseLayer,
            GPTNeoXJapaneseModel,
            GPTNeoXJapanesePreTrainedModel,
        )
@ -7142,7 +7106,6 @@ if TYPE_CHECKING:
            LongformerForTokenClassification,
            LongformerModel,
            LongformerPreTrainedModel,
-            LongformerSelfAttention,
        )
        from .models.longt5 import (
            LongT5EncoderModel,
@ -7169,7 +7132,6 @@ if TYPE_CHECKING:
            LxmertModel,
            LxmertPreTrainedModel,
            LxmertVisualFeatureEncoder,
-            LxmertXLayer,
        )
        from .models.m2m_100 import (
            M2M100ForConditionalGeneration,
@ -7186,7 +7148,7 @@ if TYPE_CHECKING:
            Mamba2Model,
            Mamba2PreTrainedModel,
        )
-        from .models.marian import MarianForCausalLM, MarianModel, MarianMTModel
+        from .models.marian import MarianForCausalLM, MarianModel, MarianMTModel, MarianPreTrainedModel
        from .models.markuplm import (
            MarkupLMForQuestionAnswering,
            MarkupLMForSequenceClassification,
@ -7252,7 +7214,6 @@ if TYPE_CHECKING:
            MobileBertForQuestionAnswering,
            MobileBertForSequenceClassification,
            MobileBertForTokenClassification,
-            MobileBertLayer,
            MobileBertModel,
            MobileBertPreTrainedModel,
            load_tf_weights_in_mobilebert,
@ -7288,7 +7249,6 @@ if TYPE_CHECKING:
            MPNetForQuestionAnswering,
            MPNetForSequenceClassification,
            MPNetForTokenClassification,
-            MPNetLayer,
            MPNetModel,
            MPNetPreTrainedModel,
        )
@ -7360,7 +7320,6 @@ if TYPE_CHECKING:
            NystromformerForQuestionAnswering,
            NystromformerForSequenceClassification,
            NystromformerForTokenClassification,
-            NystromformerLayer,
            NystromformerModel,
            NystromformerPreTrainedModel,
        )
@ -7448,7 +7407,6 @@ if TYPE_CHECKING:
            PerceiverForMultimodalAutoencoding,
            PerceiverForOpticalFlow,
            PerceiverForSequenceClassification,
-            PerceiverLayer,
            PerceiverModel,
            PerceiverPreTrainedModel,
        )
@ -7550,11 +7508,9 @@ if TYPE_CHECKING:
            RecurrentGemmaPreTrainedModel,
        )
        from .models.reformer import (
-            ReformerAttention,
            ReformerForMaskedLM,
            ReformerForQuestionAnswering,
            ReformerForSequenceClassification,
-            ReformerLayer,
            ReformerModel,
            ReformerModelWithLMHead,
            ReformerPreTrainedModel,
@ -7571,7 +7527,6 @@ if TYPE_CHECKING:
            RemBertForQuestionAnswering,
            RemBertForSequenceClassification,
            RemBertForTokenClassification,
-            RemBertLayer,
            RemBertModel,
            RemBertPreTrainedModel,
            load_tf_weights_in_rembert,
@ -7610,7 +7565,6 @@ if TYPE_CHECKING:
            RoCBertForQuestionAnswering,
            RoCBertForSequenceClassification,
            RoCBertForTokenClassification,
-            RoCBertLayer,
            RoCBertModel,
            RoCBertPreTrainedModel,
            load_tf_weights_in_roc_bert,
@ -7622,7 +7576,6 @@ if TYPE_CHECKING:
            RoFormerForQuestionAnswering,
            RoFormerForSequenceClassification,
            RoFormerForTokenClassification,
-            RoFormerLayer,
            RoFormerModel,
            RoFormerPreTrainedModel,
            load_tf_weights_in_roformer,
@ -7667,7 +7620,6 @@ if TYPE_CHECKING:
            SegformerDecodeHead,
            SegformerForImageClassification,
            SegformerForSemanticSegmentation,
-            SegformerLayer,
            SegformerModel,
            SegformerPreTrainedModel,
        )
@ -7712,7 +7664,6 @@ if TYPE_CHECKING:
        from .models.splinter import (
            SplinterForPreTraining,
            SplinterForQuestionAnswering,
-            SplinterLayer,
            SplinterModel,
            SplinterPreTrainedModel,
        )
@ -7723,7 +7674,6 @@ if TYPE_CHECKING:
            SqueezeBertForSequenceClassification,
            SqueezeBertForTokenClassification,
            SqueezeBertModel,
-            SqueezeBertModule,
            SqueezeBertPreTrainedModel,
        )
        from .models.stablelm import (
@ -7872,7 +7822,6 @@ if TYPE_CHECKING:
            ViltForMaskedLM,
            ViltForQuestionAnswering,
            ViltForTokenClassification,
-            ViltLayer,
            ViltModel,
            ViltPreTrainedModel,
        )
@ -7888,7 +7837,6 @@ if TYPE_CHECKING:
            VisualBertForQuestionAnswering,
            VisualBertForRegionToPhraseAlignment,
            VisualBertForVisualReasoning,
-            VisualBertLayer,
            VisualBertModel,
            VisualBertPreTrainedModel,
        )
@ -7900,7 +7848,6 @@ if TYPE_CHECKING:
        )
        from .models.vit_mae import (
            ViTMAEForPreTraining,
-            ViTMAELayer,
            ViTMAEModel,
            ViTMAEPreTrainedModel,
        )
@ -8042,7 +7989,6 @@ if TYPE_CHECKING:
            YosoForQuestionAnswering,
            YosoForSequenceClassification,
            YosoForTokenClassification,
-            YosoLayer,
            YosoModel,
            YosoPreTrainedModel,
        )
@ -8176,7 +8122,6 @@ if TYPE_CHECKING:
            TFBartPretrainedModel,
        )
        from .models.bert import (
-            TFBertEmbeddings,
            TFBertForMaskedLM,
            TFBertForMultipleChoice,
            TFBertForNextSentencePrediction,
@ -8230,7 +8175,6 @@ if TYPE_CHECKING:
            TFConvBertForQuestionAnswering,
            TFConvBertForSequenceClassification,
            TFConvBertForTokenClassification,
-            TFConvBertLayer,
            TFConvBertModel,
            TFConvBertPreTrainedModel,
        )
@ -8415,7 +8359,6 @@ if TYPE_CHECKING:
            TFLongformerForTokenClassification,
            TFLongformerModel,
            TFLongformerPreTrainedModel,
-            TFLongformerSelfAttention,
        )
        from .models.lxmert import (
            TFLxmertForPreTraining,
@ -8505,7 +8448,6 @@ if TYPE_CHECKING:
            TFRemBertForQuestionAnswering,
            TFRemBertForSequenceClassification,
            TFRemBertForTokenClassification,
-            TFRemBertLayer,
            TFRemBertModel,
            TFRemBertPreTrainedModel,
        )
@ -8543,7 +8485,6 @@ if TYPE_CHECKING:
            TFRoFormerForQuestionAnswering,
            TFRoFormerForSequenceClassification,
            TFRoFormerForTokenClassification,
-            TFRoFormerLayer,
            TFRoFormerModel,
            TFRoFormerPreTrainedModel,
        )
--- a/src/transformers/models/albert/init.py
+++ b/src/transformers/models/albert/init.py
@ -11,165 +11,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from typing import TYPE_CHECKING

-from ...utils import (
-    OptionalDependencyNotAvailable,
-    _LazyModule,
-    is_flax_available,
-    is_sentencepiece_available,
-    is_tf_available,
-    is_tokenizers_available,
-    is_torch_available,
-)
+from ...utils import _LazyModule
+from ...utils.import_utils import define_import_structure


-_import_structure = {
-    "configuration_albert": ["AlbertConfig", "AlbertOnnxConfig"],
-}
-
-try:
-    if not is_sentencepiece_available():
-        raise OptionalDependencyNotAvailable()
-except OptionalDependencyNotAvailable:
-    pass
-else:
-    _import_structure["tokenization_albert"] = ["AlbertTokenizer"]
-
-try:
-    if not is_tokenizers_available():
-        raise OptionalDependencyNotAvailable()
-except OptionalDependencyNotAvailable:
-    pass
-else:
-    _import_structure["tokenization_albert_fast"] = ["AlbertTokenizerFast"]
-
-try:
-    if not is_torch_available():
-        raise OptionalDependencyNotAvailable()
-except OptionalDependencyNotAvailable:
-    pass
-else:
-    _import_structure["modeling_albert"] = [
-        "AlbertForMaskedLM",
-        "AlbertForMultipleChoice",
-        "AlbertForPreTraining",
-        "AlbertForQuestionAnswering",
-        "AlbertForSequenceClassification",
-        "AlbertForTokenClassification",
-        "AlbertModel",
-        "AlbertPreTrainedModel",
-        "load_tf_weights_in_albert",
-    ]
-
-try:
-    if not is_tf_available():
-        raise OptionalDependencyNotAvailable()
-except OptionalDependencyNotAvailable:
-    pass
-else:
-    _import_structure["modeling_tf_albert"] = [
-        "TFAlbertForMaskedLM",
-        "TFAlbertForMultipleChoice",
-        "TFAlbertForPreTraining",
-        "TFAlbertForQuestionAnswering",
-        "TFAlbertForSequenceClassification",
-        "TFAlbertForTokenClassification",
-        "TFAlbertMainLayer",
-        "TFAlbertModel",
-        "TFAlbertPreTrainedModel",
-    ]
-
-try:
-    if not is_flax_available():
-        raise OptionalDependencyNotAvailable()
-except OptionalDependencyNotAvailable:
-    pass
-else:
-    _import_structure["modeling_flax_albert"] = [
-        "FlaxAlbertForMaskedLM",
-        "FlaxAlbertForMultipleChoice",
-        "FlaxAlbertForPreTraining",
-        "FlaxAlbertForQuestionAnswering",
-        "FlaxAlbertForSequenceClassification",
-        "FlaxAlbertForTokenClassification",
-        "FlaxAlbertModel",
-        "FlaxAlbertPreTrainedModel",
-    ]
-
 if TYPE_CHECKING:
-    from .configuration_albert import AlbertConfig, AlbertOnnxConfig
-
-    try:
-        if not is_sentencepiece_available():
-            raise OptionalDependencyNotAvailable()
-    except OptionalDependencyNotAvailable:
-        pass
-    else:
-        from .tokenization_albert import AlbertTokenizer
-
-    try:
-        if not is_tokenizers_available():
-            raise OptionalDependencyNotAvailable()
-    except OptionalDependencyNotAvailable:
-        pass
-    else:
-        from .tokenization_albert_fast import AlbertTokenizerFast
-
-    try:
-        if not is_torch_available():
-            raise OptionalDependencyNotAvailable()
-    except OptionalDependencyNotAvailable:
-        pass
-    else:
-        from .modeling_albert import (
-            AlbertForMaskedLM,
-            AlbertForMultipleChoice,
-            AlbertForPreTraining,
-            AlbertForQuestionAnswering,
-            AlbertForSequenceClassification,
-            AlbertForTokenClassification,
-            AlbertModel,
-            AlbertPreTrainedModel,
-            load_tf_weights_in_albert,
-        )
-
-    try:
-        if not is_tf_available():
-            raise OptionalDependencyNotAvailable()
-    except OptionalDependencyNotAvailable:
-        pass
-    else:
-        from .modeling_tf_albert import (
-            TFAlbertForMaskedLM,
-            TFAlbertForMultipleChoice,
-            TFAlbertForPreTraining,
-            TFAlbertForQuestionAnswering,
-            TFAlbertForSequenceClassification,
-            TFAlbertForTokenClassification,
-            TFAlbertMainLayer,
-            TFAlbertModel,
-            TFAlbertPreTrainedModel,
-        )
-
-    try:
-        if not is_flax_available():
-            raise OptionalDependencyNotAvailable()
-    except OptionalDependencyNotAvailable:
-        pass
-    else:
-        from .modeling_flax_albert import (
-            FlaxAlbertForMaskedLM,
-            FlaxAlbertForMultipleChoice,
-            FlaxAlbertForPreTraining,
-            FlaxAlbertForQuestionAnswering,
-            FlaxAlbertForSequenceClassification,
-            FlaxAlbertForTokenClassification,
-            FlaxAlbertModel,
-            FlaxAlbertPreTrainedModel,
-        )
+    from .configuration_albert import *
+    from .modeling_albert import *
+    from .modeling_flax_albert import *
+    from .modeling_tf_albert import *
+    from .tokenization_albert import *
+    from .tokenization_albert_fast import *
 else:
    import sys

-    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
+    _file = globals()["__file__"]
+    sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
--- a/src/transformers/models/albert/configuration_albert.py
+++ b/src/transformers/models/albert/configuration_albert.py
@ -165,3 +165,6 @@ class AlbertOnnxConfig(OnnxConfig):
                ("token_type_ids", dynamic_axis),
            ]
        )
+
+
+__all__ = ["AlbertConfig", "AlbertOnnxConfig"]
--- a/src/transformers/models/albert/modeling_albert.py
+++ b/src/transformers/models/albert/modeling_albert.py
@ -1466,3 +1466,16 @@ class AlbertForMultipleChoice(AlbertPreTrainedModel):
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
+
+
+__all__ = [
+    "load_tf_weights_in_albert",
+    "AlbertPreTrainedModel",
+    "AlbertModel",
+    "AlbertForPreTraining",
+    "AlbertForMaskedLM",
+    "AlbertForSequenceClassification",
+    "AlbertForTokenClassification",
+    "AlbertForQuestionAnswering",
+    "AlbertForMultipleChoice",
+]
--- a/src/transformers/models/albert/modeling_flax_albert.py
+++ b/src/transformers/models/albert/modeling_flax_albert.py
@ -1119,3 +1119,14 @@ append_call_sample_docstring(
    FlaxQuestionAnsweringModelOutput,
    _CONFIG_FOR_DOC,
 )
+
+__all__ = [
+    "FlaxAlbertPreTrainedModel",
+    "FlaxAlbertModel",
+    "FlaxAlbertForPreTraining",
+    "FlaxAlbertForMaskedLM",
+    "FlaxAlbertForSequenceClassification",
+    "FlaxAlbertForMultipleChoice",
+    "FlaxAlbertForTokenClassification",
+    "FlaxAlbertForQuestionAnswering",
+]
--- a/src/transformers/models/albert/modeling_tf_albert.py
+++ b/src/transformers/models/albert/modeling_tf_albert.py
@ -1558,3 +1558,16 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss):
        if getattr(self, "classifier", None) is not None:
            with tf.name_scope(self.classifier.name):
                self.classifier.build([None, None, self.config.hidden_size])
+
+
+__all__ = [
+    "TFAlbertPreTrainedModel",
+    "TFAlbertModel",
+    "TFAlbertForPreTraining",
+    "TFAlbertForMaskedLM",
+    "TFAlbertForSequenceClassification",
+    "TFAlbertForTokenClassification",
+    "TFAlbertForQuestionAnswering",
+    "TFAlbertForMultipleChoice",
+    "TFAlbertMainLayer",
+]
--- a/src/transformers/models/albert/tokenization_albert.py
+++ b/src/transformers/models/albert/tokenization_albert.py
@ -23,6 +23,7 @@ import sentencepiece as spm

 from ...tokenization_utils import AddedToken, PreTrainedTokenizer
 from ...utils import logging
+from ...utils.import_utils import export


 logger = logging.get_logger(__name__)
@ -32,6 +33,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"}
 SPIECE_UNDERLINE = "▁"


+@export(backends=("sentencepiece",))
 class AlbertTokenizer(PreTrainedTokenizer):
    """
    Construct an ALBERT tokenizer. Based on [SentencePiece](https://github.com/google/sentencepiece).
@ -343,3 +345,6 @@ class AlbertTokenizer(PreTrainedTokenizer):
                fi.write(content_spiece_model)

        return (out_vocab_file,)
+
+
+__all__ = ["AlbertTokenizer"]
--- a/src/transformers/models/albert/tokenization_albert_fast.py
+++ b/src/transformers/models/albert/tokenization_albert_fast.py
@ -207,3 +207,6 @@ class AlbertTokenizerFast(PreTrainedTokenizerFast):
            copyfile(self.vocab_file, out_vocab_file)

        return (out_vocab_file,)
+
+
+__all__ = ["AlbertTokenizerFast"]
--- a/src/transformers/models/align/init.py
+++ b/src/transformers/models/align/init.py
@ -13,57 +13,16 @@
 # limitations under the License.
 from typing import TYPE_CHECKING

-from ...utils import (
-    OptionalDependencyNotAvailable,
-    _LazyModule,
-    is_torch_available,
-)
+from ...utils import _LazyModule
+from ...utils.import_utils import define_import_structure


-_import_structure = {
-    "configuration_align": [
-        "AlignConfig",
-        "AlignTextConfig",
-        "AlignVisionConfig",
-    ],
-    "processing_align": ["AlignProcessor"],
-}
-
-try:
-    if not is_torch_available():
-        raise OptionalDependencyNotAvailable()
-except OptionalDependencyNotAvailable:
-    pass
-else:
-    _import_structure["modeling_align"] = [
-        "AlignModel",
-        "AlignPreTrainedModel",
-        "AlignTextModel",
-        "AlignVisionModel",
-    ]
-
 if TYPE_CHECKING:
-    from .configuration_align import (
-        AlignConfig,
-        AlignTextConfig,
-        AlignVisionConfig,
-    )
-    from .processing_align import AlignProcessor
-
-    try:
-        if not is_torch_available():
-            raise OptionalDependencyNotAvailable()
-    except OptionalDependencyNotAvailable:
-        pass
-    else:
-        from .modeling_align import (
-            AlignModel,
-            AlignPreTrainedModel,
-            AlignTextModel,
-            AlignVisionModel,
-        )
-
+    from .configuration_align import *
+    from .modeling_align import *
+    from .processing_align import *
 else:
    import sys

-    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
+    _file = globals()["__file__"]
+    sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
--- a/src/transformers/models/align/configuration_align.py
+++ b/src/transformers/models/align/configuration_align.py
@ -378,3 +378,6 @@ class AlignConfig(PretrainedConfig):
        """

        return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
+
+
+__all__ = ["AlignTextConfig", "AlignVisionConfig", "AlignConfig"]
--- a/src/transformers/models/align/modeling_align.py
+++ b/src/transformers/models/align/modeling_align.py
@ -1636,3 +1636,6 @@ class AlignModel(AlignPreTrainedModel):
            text_model_output=text_outputs,
            vision_model_output=vision_outputs,
        )
+
+
+__all__ = ["AlignPreTrainedModel", "AlignTextModel", "AlignVisionModel", "AlignModel"]
--- a/src/transformers/models/align/processing_align.py
+++ b/src/transformers/models/align/processing_align.py
@ -162,3 +162,6 @@ class AlignProcessor(ProcessorMixin):
        tokenizer_input_names = self.tokenizer.model_input_names
        image_processor_input_names = self.image_processor.model_input_names
        return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
+
+
+__all__ = ["AlignProcessor"]
--- a/src/transformers/models/altclip/init.py
+++ b/src/transformers/models/altclip/init.py
@ -13,55 +13,16 @@
 # limitations under the License.
 from typing import TYPE_CHECKING

-from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
-
-
-_import_structure = {
-    "configuration_altclip": [
-        "AltCLIPConfig",
-        "AltCLIPTextConfig",
-        "AltCLIPVisionConfig",
-    ],
-    "processing_altclip": ["AltCLIPProcessor"],
-}
-
-try:
-    if not is_torch_available():
-        raise OptionalDependencyNotAvailable()
-except OptionalDependencyNotAvailable:
-    pass
-else:
-    _import_structure["modeling_altclip"] = [
-        "AltCLIPPreTrainedModel",
-        "AltCLIPModel",
-        "AltCLIPTextModel",
-        "AltCLIPVisionModel",
-    ]
+from ...utils import _LazyModule
+from ...utils.import_utils import define_import_structure


 if TYPE_CHECKING:
-    from .configuration_altclip import (
-        AltCLIPConfig,
-        AltCLIPTextConfig,
-        AltCLIPVisionConfig,
-    )
-    from .processing_altclip import AltCLIPProcessor
-
-    try:
-        if not is_torch_available():
-            raise OptionalDependencyNotAvailable()
-    except OptionalDependencyNotAvailable:
-        pass
-    else:
-        from .modeling_altclip import (
-            AltCLIPModel,
-            AltCLIPPreTrainedModel,
-            AltCLIPTextModel,
-            AltCLIPVisionModel,
-        )
-
-
+    from .configuration_altclip import *
+    from .modeling_altclip import *
+    from .processing_altclip import *
 else:
    import sys

-    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
+    _file = globals()["__file__"]
+    sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
--- a/src/transformers/models/altclip/configuration_altclip.py
+++ b/src/transformers/models/altclip/configuration_altclip.py
@ -398,3 +398,6 @@ class AltCLIPConfig(PretrainedConfig):
        """

        return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
+
+
+__all__ = ["AltCLIPTextConfig", "AltCLIPVisionConfig", "AltCLIPConfig"]
--- a/src/transformers/models/altclip/modeling_altclip.py
+++ b/src/transformers/models/altclip/modeling_altclip.py
@ -1694,3 +1694,6 @@ def create_position_ids_from_input_ids(input_ids, padding_idx, past_key_values_l
    mask = input_ids.ne(padding_idx).int()
    incremental_indices = (torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length) * mask
    return incremental_indices.long() + padding_idx
+
+
+__all__ = ["AltCLIPPreTrainedModel", "AltCLIPVisionModel", "AltCLIPTextModel", "AltCLIPModel"]
--- a/src/transformers/models/altclip/processing_altclip.py
+++ b/src/transformers/models/altclip/processing_altclip.py
@ -130,3 +130,6 @@ class AltCLIPProcessor(ProcessorMixin):
        tokenizer_input_names = self.tokenizer.model_input_names
        image_processor_input_names = self.image_processor.model_input_names
        return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
+
+
+__all__ = ["AltCLIPProcessor"]
--- a/src/transformers/utils/dummy_pt_objects.py
+++ b/src/transformers/utils/dummy_pt_objects.py
@ -1262,13 +1262,6 @@ class BertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class BertLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class BertLMHeadModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -1368,13 +1361,6 @@ class BigBirdForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class BigBirdLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class BigBirdModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -1862,13 +1848,6 @@ class CanineForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class CanineLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class CanineModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -2230,13 +2209,6 @@ class ConvBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class ConvBertLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class ConvBertModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -3144,13 +3116,6 @@ class QDQBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class QDQBertLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class QDQBertLMHeadModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -4133,13 +4098,6 @@ class FNetForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class FNetLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class FNetModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -4572,13 +4530,6 @@ class GPTNeoXForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class GPTNeoXLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class GPTNeoXModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -4600,13 +4551,6 @@ class GPTNeoXJapaneseForCausalLM(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class GPTNeoXJapaneseLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class GPTNeoXJapaneseModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -5437,13 +5381,6 @@ class LongformerPreTrainedModel(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class LongformerSelfAttention(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class LongT5EncoderModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -5584,13 +5521,6 @@ class LxmertVisualFeatureEncoder(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class LxmertXLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class M2M100ForConditionalGeneration(metaclass=DummyObject):
    _backends = ["torch"]

@ -5675,6 +5605,13 @@ class MarianMTModel(metaclass=DummyObject):
        requires_backends(self, ["torch"])


+class MarianPreTrainedModel(metaclass=DummyObject):
+    _backends = ["torch"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+
+
 class MarkupLMForQuestionAnswering(metaclass=DummyObject):
    _backends = ["torch"]

@ -6011,13 +5948,6 @@ class MobileBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class MobileBertLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class MobileBertModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -6184,13 +6114,6 @@ class MPNetForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class MPNetLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class MPNetModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -6562,13 +6485,6 @@ class NystromformerForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class NystromformerLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class NystromformerModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -6993,13 +6909,6 @@ class PerceiverForSequenceClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class PerceiverLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class PerceiverModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -7469,13 +7378,6 @@ class RecurrentGemmaPreTrainedModel(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class ReformerAttention(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class ReformerForMaskedLM(metaclass=DummyObject):
    _backends = ["torch"]

@ -7497,13 +7399,6 @@ class ReformerForSequenceClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class ReformerLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class ReformerModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -7588,13 +7483,6 @@ class RemBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class RemBertLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class RemBertModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -7802,13 +7690,6 @@ class RoCBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class RoCBertLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class RoCBertModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -7869,13 +7750,6 @@ class RoFormerForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class RoFormerLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class RoFormerModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -8097,13 +7971,6 @@ class SegformerForSemanticSegmentation(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class SegformerLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class SegformerModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -8314,13 +8181,6 @@ class SplinterForQuestionAnswering(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class SplinterLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class SplinterModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -8377,13 +8237,6 @@ class SqueezeBertModel(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class SqueezeBertModule(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class SqueezeBertPreTrainedModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -9092,13 +8945,6 @@ class ViltForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class ViltLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class ViltModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -9176,13 +9022,6 @@ class VisualBertForVisualReasoning(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class VisualBertLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class VisualBertModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -9232,13 +9071,6 @@ class ViTMAEForPreTraining(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class ViTMAELayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class ViTMAEModel(metaclass=DummyObject):
    _backends = ["torch"]

@ -9957,13 +9789,6 @@ class YosoForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class YosoLayer(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class YosoModel(metaclass=DummyObject):
    _backends = ["torch"]

--- a/src/transformers/utils/dummy_sentencepiece_objects.py
+++ b/src/transformers/utils/dummy_sentencepiece_objects.py
@ -128,14 +128,14 @@ class MarianTokenizer(metaclass=DummyObject):
        requires_backends(self, ["sentencepiece"])


-class MBart50Tokenizer(metaclass=DummyObject):
+class MBartTokenizer(metaclass=DummyObject):
    _backends = ["sentencepiece"]

    def __init__(self, *args, **kwargs):
        requires_backends(self, ["sentencepiece"])


-class MBartTokenizer(metaclass=DummyObject):
+class MBart50Tokenizer(metaclass=DummyObject):
    _backends = ["sentencepiece"]

    def __init__(self, *args, **kwargs):
--- a/src/transformers/utils/dummy_tf_objects.py
+++ b/src/transformers/utils/dummy_tf_objects.py
@ -478,13 +478,6 @@ class TFBartPretrainedModel(metaclass=DummyObject):
        requires_backends(self, ["tf"])


-class TFBertEmbeddings(metaclass=DummyObject):
-    _backends = ["tf"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["tf"])
-
-
 class TFBertForMaskedLM(metaclass=DummyObject):
    _backends = ["tf"]

@ -772,13 +765,6 @@ class TFConvBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["tf"])


-class TFConvBertLayer(metaclass=DummyObject):
-    _backends = ["tf"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["tf"])
-
-
 class TFConvBertModel(metaclass=DummyObject):
    _backends = ["tf"]

@ -1717,13 +1703,6 @@ class TFLongformerPreTrainedModel(metaclass=DummyObject):
        requires_backends(self, ["tf"])


-class TFLongformerSelfAttention(metaclass=DummyObject):
-    _backends = ["tf"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["tf"])
-
-
 class TFLxmertForPreTraining(metaclass=DummyObject):
    _backends = ["tf"]

@ -2179,13 +2158,6 @@ class TFRemBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["tf"])


-class TFRemBertLayer(metaclass=DummyObject):
-    _backends = ["tf"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["tf"])
-
-
 class TFRemBertModel(metaclass=DummyObject):
    _backends = ["tf"]

@ -2389,13 +2361,6 @@ class TFRoFormerForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["tf"])


-class TFRoFormerLayer(metaclass=DummyObject):
-    _backends = ["tf"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["tf"])
-
-
 class TFRoFormerModel(metaclass=DummyObject):
    _backends = ["tf"]

--- a/src/transformers/utils/import_utils.py
+++ b/src/transformers/utils/import_utils.py
@ -15,6 +15,7 @@
 Import utilities: Utilities related to imports and our lazy inits.
 """

+import importlib.machinery
 import importlib.metadata
 import importlib.util
 import json
@ -27,7 +28,7 @@ from collections import OrderedDict
 from functools import lru_cache
 from itertools import chain
 from types import ModuleType
-from typing import Any, Optional, Tuple, Union
+from typing import Any, Dict, FrozenSet, Optional, Set, Tuple, Union

 from packaging import version

@ -1386,6 +1387,11 @@ explained here: https://www.tensorflow.org/text/guide/tf_text_intro.
 Please note that you may need to restart your runtime after installation.
 """

+# docstyle-ignore
+TORCHAUDIO_IMPORT_ERROR = """
+{0} requires the torchaudio library but it was not found in your environment. Please install it and restart your
+runtime.
+"""

 # docstyle-ignore
 PANDAS_IMPORT_ERROR = """
@ -1561,6 +1567,7 @@ BACKENDS_MAPPING = OrderedDict(
        ("tf", (is_tf_available, TENSORFLOW_IMPORT_ERROR)),
        ("tensorflow_text", (is_tensorflow_text_available, TENSORFLOW_TEXT_IMPORT_ERROR)),
        ("timm", (is_timm_available, TIMM_IMPORT_ERROR)),
+        ("torchaudio", (is_torchaudio_available, TORCHAUDIO_IMPORT_ERROR)),
        ("natten", (is_natten_available, NATTEN_IMPORT_ERROR)),
        ("nltk", (is_nltk_available, NLTK_IMPORT_ERROR)),
        ("tokenizers", (is_tokenizers_available, TOKENIZERS_IMPORT_ERROR)),
@ -1619,6 +1626,10 @@ def is_torch_fx_proxy(x):
    return False


+BACKENDS_T = FrozenSet[str]
+IMPORT_STRUCTURE_T = Dict[BACKENDS_T, Dict[str, Set[str]]]
+
+
 class _LazyModule(ModuleType):
    """
    Module class that surfaces all objects but only performs associated imports when the objects are requested.
@ -1626,21 +1637,71 @@ class _LazyModule(ModuleType):

    # Very heavily inspired by optuna.integration._IntegrationModule
    # https://github.com/optuna/optuna/blob/master/optuna/integration/__init__.py
-    def __init__(self, name, module_file, import_structure, module_spec=None, extra_objects=None):
+    def __init__(
+        self,
+        name: str,
+        module_file: str,
+        import_structure: IMPORT_STRUCTURE_T,
+        module_spec: importlib.machinery.ModuleSpec = None,
+        extra_objects: Dict[str, object] = None,
+    ):
        super().__init__(name)
-        self._modules = set(import_structure.keys())
-        self._class_to_module = {}
-        for key, values in import_structure.items():
-            for value in values:
-                self._class_to_module[value] = key
-        # Needed for autocompletion in an IDE
-        self.__all__ = list(import_structure.keys()) + list(chain(*import_structure.values()))
-        self.__file__ = module_file
-        self.__spec__ = module_spec
-        self.__path__ = [os.path.dirname(module_file)]
-        self._objects = {} if extra_objects is None else extra_objects
-        self._name = name
-        self._import_structure = import_structure
+
+        self._object_missing_backend = {}
+        if any(isinstance(key, frozenset) for key in import_structure.keys()):
+            self._modules = set()
+            self._class_to_module = {}
+            self.__all__ = []
+
+            _import_structure = {}
+
+            for backends, module in import_structure.items():
+                missing_backends = []
+                for backend in backends:
+                    if backend not in BACKENDS_MAPPING:
+                        raise ValueError(
+                            f"Error: the following backend: '{backend}' was specified around object {module} but isn't specified in the backends mapping."
+                        )
+                    callable, error = BACKENDS_MAPPING[backend]
+                    if not callable():
+                        missing_backends.append(backend)
+                self._modules = self._modules.union(set(module.keys()))
+
+                for key, values in module.items():
+                    if len(missing_backends):
+                        self._object_missing_backend[key] = missing_backends
+
+                    for value in values:
+                        self._class_to_module[value] = key
+                        if len(missing_backends):
+                            self._object_missing_backend[value] = missing_backends
+                    _import_structure.setdefault(key, []).extend(values)
+
+                # Needed for autocompletion in an IDE
+                self.__all__.extend(list(module.keys()) + list(chain(*module.values())))
+
+            self.__file__ = module_file
+            self.__spec__ = module_spec
+            self.__path__ = [os.path.dirname(module_file)]
+            self._objects = {} if extra_objects is None else extra_objects
+            self._name = name
+            self._import_structure = _import_structure
+
+        # This can be removed once every exportable object has a `export()` export.
+        else:
+            self._modules = set(import_structure.keys())
+            self._class_to_module = {}
+            for key, values in import_structure.items():
+                for value in values:
+                    self._class_to_module[value] = key
+            # Needed for autocompletion in an IDE
+            self.__all__ = list(import_structure.keys()) + list(chain(*import_structure.values()))
+            self.__file__ = module_file
+            self.__spec__ = module_spec
+            self.__path__ = [os.path.dirname(module_file)]
+            self._objects = {} if extra_objects is None else extra_objects
+            self._name = name
+            self._import_structure = import_structure

    # Needed for autocompletion in an IDE
    def __dir__(self):
@ -1657,6 +1718,19 @@ class _LazyModule(ModuleType):
            return self._objects[name]
        if name in self._modules:
            value = self._get_module(name)
+        elif name in self._object_missing_backend.keys():
+            missing_backends = self._object_missing_backend[name]
+
+            class Placeholder(metaclass=DummyObject):
+                _backends = missing_backends
+
+                def __init__(self, *args, **kwargs):
+                    requires_backends(self, missing_backends)
+
+            Placeholder.__name__ = name
+            Placeholder.__module__ = self.__spec__
+
+            value = Placeholder
        elif name in self._class_to_module.keys():
            module = self._get_module(self._class_to_module[name])
            value = getattr(module, name)
@ -1700,3 +1774,385 @@ def direct_transformers_import(path: str, file="__init__.py") -> ModuleType:
    spec.loader.exec_module(module)
    module = sys.modules[name]
    return module
+
+
+def export(*, backends=()):
+    """
+    This decorator enables two things:
+    - Attaching a `__backends` tuple to an object to see what are the necessary backends for it
+      to execute correctly without instantiating it
+    - The '@export' string is used to dynamically import objects
+    """
+    for backend in backends:
+        if backend not in BACKENDS_MAPPING:
+            raise ValueError(f"Backend should be defined in the BACKENDS_MAPPING. Offending backend: {backend}")
+
+    if not isinstance(backends, tuple):
+        raise ValueError("Backends should be a tuple.")
+
+    def inner_fn(fun):
+        fun.__backends = backends
+        return fun
+
+    return inner_fn
+
+
+BASE_FILE_REQUIREMENTS = {
+    lambda e: "modeling_tf_" in e: ("tf",),
+    lambda e: "modeling_flax_" in e: ("flax",),
+    lambda e: "modeling_" in e: ("torch",),
+    lambda e: e.startswith("tokenization_") and e.endswith("_fast"): ("tokenizers",),
+}
+
+
+def fetch__all__(file_content):
+    """
+    Returns the content of the __all__ variable in the file content.
+    Returns None if not defined, otherwise returns a list of strings.
+    """
+
+    if "__all__" not in file_content:
+        return []
+
+    lines = file_content.splitlines()
+    for index, line in enumerate(lines):
+        if line.startswith("__all__"):
+            start_index = index
+
+    lines = lines[start_index:]
+
+    if not lines[0].startswith("__all__"):
+        raise ValueError(
+            "fetch__all__ accepts a list of lines, with the first line being the __all__ variable declaration"
+        )
+
+    # __all__ is defined on a single line
+    if lines[0].endswith("]"):
+        return [obj.strip("\"' ") for obj in lines[0].split("=")[1].strip(" []").split(",")]
+
+    # __all__ is defined on multiple lines
+    else:
+        _all = []
+        for __all__line_index in range(1, len(lines)):
+            if lines[__all__line_index].strip() == "]":
+                return _all
+            else:
+                _all.append(lines[__all__line_index].strip("\"', "))
+
+        return _all
+
+
+@lru_cache()
+def create_import_structure_from_path(module_path):
+    """
+    This method takes the path to a file/a folder and returns the import structure.
+    If a file is given, it will return the import structure of the parent folder.
+
+    Import structures are designed to be digestible by `_LazyModule` objects. They are
+    created from the __all__ definitions in each files as well as the `@export` decorators
+    above methods and objects.
+
+    The import structure allows explicit display of the required backends for a given object.
+    These backends are specified in two ways:
+
+    1. Through their `@export`, if they are exported with that decorator. This `@export` decorator
+       accepts a `backend` tuple kwarg mentioning which backends are required to run this object.
+
+    2. If an object is defined in a file with "default" backends, it will have, at a minimum, this
+       backend specified. The default backends are defined according to the filename:
+
+       - If a file is named like `modeling_*.py`, it will have a `torch` backend
+       - If a file is named like `modeling_tf_*.py`, it will have a `tf` backend
+       - If a file is named like `modeling_flax_*.py`, it will have a `flax` backend
+       - If a file is named like `tokenization_*_fast.py`, it will have a `tokenizers` backend
+
+    Backends serve the purpose of displaying a clear error message to the user in case the backends are not installed.
+    Should an object be imported without its required backends being in the environment, any attempt to use the
+    object will raise an error mentioning which backend(s) should be added to the environment in order to use
+    that object.
+
+    Here's an example of an input import structure at the src.transformers.models level:
+
+    {
+        'albert': {
+            frozenset(): {
+                'configuration_albert': {'AlbertConfig', 'AlbertOnnxConfig'}
+            },
+            frozenset({'tokenizers'}): {
+                'tokenization_albert_fast': {'AlbertTokenizerFast'}
+            },
+        },
+        'align': {
+            frozenset(): {
+                'configuration_align': {'AlignConfig', 'AlignTextConfig', 'AlignVisionConfig'},
+                'processing_align': {'AlignProcessor'}
+            },
+        },
+        'altclip': {
+            frozenset(): {
+                'configuration_altclip': {'AltCLIPConfig', 'AltCLIPTextConfig', 'AltCLIPVisionConfig'},
+                'processing_altclip': {'AltCLIPProcessor'},
+            }
+        }
+    }
+    """
+    import_structure = {}
+    if os.path.isdir(module_path):
+        directory = module_path
+        adjacent_modules = []
+
+        for f in os.listdir(module_path):
+            if f != "__pycache__" and os.path.isdir(os.path.join(module_path, f)):
+                import_structure[f] = create_import_structure_from_path(os.path.join(module_path, f))
+
+            elif not os.path.isdir(os.path.join(directory, f)):
+                adjacent_modules.append(f)
+
+    else:
+        directory = os.path.dirname(module_path)
+        adjacent_modules = [f for f in os.listdir(directory) if not os.path.isdir(os.path.join(directory, f))]
+
+    # We're only taking a look at files different from __init__.py
+    # We could theoretically export things directly from the __init__.py
+    # files, but this is not supported at this time.
+    if "__init__.py" in adjacent_modules:
+        adjacent_modules.remove("__init__.py")
+
+    module_requirements = {}
+    for module_name in adjacent_modules:
+        # Only modules ending in `.py` are accepted here.
+        if not module_name.endswith(".py"):
+            continue
+
+        with open(os.path.join(directory, module_name)) as f:
+            file_content = f.read()
+
+        # Remove the .py suffix
+        module_name = module_name[:-3]
+
+        previous_line = ""
+        previous_index = 0
+
+        # Some files have some requirements by default.
+        # For example, any file named `modeling_tf_xxx.py`
+        # should have TensorFlow as a required backend.
+        base_requirements = ()
+        for string_check, requirements in BASE_FILE_REQUIREMENTS.items():
+            if string_check(module_name):
+                base_requirements = requirements
+                break
+
+        # Objects that have a `@export` assigned to them will get exported
+        # with the backends specified in the decorator as well as the file backends.
+        exported_objects = set()
+        if "@export" in file_content:
+            lines = file_content.split("\n")
+            for index, line in enumerate(lines):
+                # This allows exporting items with other decorators. We'll take a look
+                # at the line that follows at the same indentation level.
+                if line.startswith((" ", "\t", "@", ")")) and not line.startswith("@export"):
+                    continue
+
+                # Skipping line enables putting whatever we want between the
+                # export() call and the actual class/method definition.
+                # This is what enables having # Copied from statements, docs, etc.
+                skip_line = False
+
+                if "@export" in previous_line:
+                    skip_line = False
+
+                    # Backends are defined on the same line as export
+                    if "backends" in previous_line:
+                        backends_string = previous_line.split("backends=")[1].split("(")[1].split(")")[0]
+                        backends = tuple(sorted([b.strip("'\",") for b in backends_string.split(", ") if b]))
+
+                    # Backends are defined in the lines following export, for example such as:
+                    # @export(
+                    #     backends=(
+                    #             "sentencepiece",
+                    #             "torch",
+                    #             "tf",
+                    #     )
+                    # )
+                    #
+                    # or
+                    #
+                    # @export(
+                    #     backends=(
+                    #             "sentencepiece", "tf"
+                    #     )
+                    # )
+                    elif "backends" in lines[previous_index + 1]:
+                        backends = []
+                        for backend_line in lines[previous_index:index]:
+                            if "backends" in backend_line:
+                                backend_line = backend_line.split("=")[1]
+                            if '"' in backend_line or "'" in backend_line:
+                                if ", " in backend_line:
+                                    backends.extend(backend.strip("()\"', ") for backend in backend_line.split(", "))
+                                else:
+                                    backends.append(backend_line.strip("()\"', "))
+
+                            # If the line is only a ')', then we reached the end of the backends and we break.
+                            if backend_line.strip() == ")":
+                                break
+                        backends = tuple(backends)
+
+                    # No backends are registered for export
+                    else:
+                        backends = ()
+
+                    backends = frozenset(backends + base_requirements)
+                    if backends not in module_requirements:
+                        module_requirements[backends] = {}
+                    if module_name not in module_requirements[backends]:
+                        module_requirements[backends][module_name] = set()
+
+                    if not line.startswith("class") and not line.startswith("def"):
+                        skip_line = True
+                    else:
+                        start_index = 6 if line.startswith("class") else 4
+                        object_name = line[start_index:].split("(")[0].strip(":")
+                        module_requirements[backends][module_name].add(object_name)
+                        exported_objects.add(object_name)
+
+                if not skip_line:
+                    previous_line = line
+                    previous_index = index
+
+        # All objects that are in __all__ should be exported by default.
+        # These objects are exported with the file backends.
+        if "__all__" in file_content:
+            for _all_object in fetch__all__(file_content):
+                if _all_object not in exported_objects:
+                    backends = frozenset(base_requirements)
+                    if backends not in module_requirements:
+                        module_requirements[backends] = {}
+                    if module_name not in module_requirements[backends]:
+                        module_requirements[backends][module_name] = set()
+
+                    module_requirements[backends][module_name].add(_all_object)
+
+    import_structure = {**module_requirements, **import_structure}
+    return import_structure
+
+
+def spread_import_structure(nested_import_structure):
+    """
+    This method takes as input an unordered import structure and brings the required backends at the top-level,
+    aggregating modules and objects under their required backends.
+
+    Here's an example of an input import structure at the src.transformers.models level:
+
+    {
+        'albert': {
+            frozenset(): {
+                'configuration_albert': {'AlbertConfig', 'AlbertOnnxConfig'}
+            },
+            frozenset({'tokenizers'}): {
+                'tokenization_albert_fast': {'AlbertTokenizerFast'}
+            },
+        },
+        'align': {
+            frozenset(): {
+                'configuration_align': {'AlignConfig', 'AlignTextConfig', 'AlignVisionConfig'},
+                'processing_align': {'AlignProcessor'}
+            },
+        },
+        'altclip': {
+            frozenset(): {
+                'configuration_altclip': {'AltCLIPConfig', 'AltCLIPTextConfig', 'AltCLIPVisionConfig'},
+                'processing_altclip': {'AltCLIPProcessor'},
+            }
+        }
+    }
+
+    Here's an example of an output import structure at the src.transformers.models level:
+
+    {
+        frozenset({'tokenizers'}): {
+            'albert.tokenization_albert_fast': {'AlbertTokenizerFast'}
+        },
+        frozenset(): {
+            'albert.configuration_albert': {'AlbertConfig', 'AlbertOnnxConfig'},
+            'align.processing_align': {'AlignProcessor'},
+            'align.configuration_align': {'AlignConfig', 'AlignTextConfig', 'AlignVisionConfig'},
+            'altclip.configuration_altclip': {'AltCLIPConfig', 'AltCLIPTextConfig', 'AltCLIPVisionConfig'},
+            'altclip.processing_altclip': {'AltCLIPProcessor'}
+        }
+    }
+
+    """
+
+    def propagate_frozenset(unordered_import_structure):
+        tuple_first_import_structure = {}
+        for _key, _value in unordered_import_structure.items():
+            if not isinstance(_value, dict):
+                tuple_first_import_structure[_key] = _value
+
+            elif any(isinstance(v, frozenset) for v in _value.keys()):
+                # Here we want to switch around key and v
+                for k, v in _value.items():
+                    if isinstance(k, frozenset):
+                        if k not in tuple_first_import_structure:
+                            tuple_first_import_structure[k] = {}
+                        tuple_first_import_structure[k][_key] = v
+
+            else:
+                tuple_first_import_structure[_key] = propagate_frozenset(_value)
+
+        return tuple_first_import_structure
+
+    def flatten_dict(_dict, previous_key=None):
+        items = []
+        for _key, _value in _dict.items():
+            _key = f"{previous_key}.{_key}" if previous_key is not None else _key
+            if isinstance(_value, dict):
+                items.extend(flatten_dict(_value, _key).items())
+            else:
+                items.append((_key, _value))
+        return dict(items)
+
+    # The tuples contain the necessary backends. We want these first, so we propagate them up the
+    # import structure.
+    ordered_import_structure = nested_import_structure
+
+    # 6 is a number that gives us sufficient depth to go through all files and foreseeable folder depths
+    # while not taking too long to parse.
+    for i in range(6):
+        ordered_import_structure = propagate_frozenset(ordered_import_structure)
+
+    # We then flatten the dict so that it references a module path.
+    flattened_import_structure = {}
+    for key, value in ordered_import_structure.copy().items():
+        if isinstance(key, str):
+            del ordered_import_structure[key]
+        else:
+            flattened_import_structure[key] = flatten_dict(value)
+
+    return flattened_import_structure
+
+
+def define_import_structure(module_path: str) -> IMPORT_STRUCTURE_T:
+    """
+    This method takes a module_path as input and creates an import structure digestible by a _LazyModule.
+
+    Here's an example of an output import structure at the src.transformers.models level:
+
+    {
+        frozenset({'tokenizers'}): {
+            'albert.tokenization_albert_fast': {'AlbertTokenizerFast'}
+        },
+        frozenset(): {
+            'albert.configuration_albert': {'AlbertConfig', 'AlbertOnnxConfig'},
+            'align.processing_align': {'AlignProcessor'},
+            'align.configuration_align': {'AlignConfig', 'AlignTextConfig', 'AlignVisionConfig'},
+            'altclip.configuration_altclip': {'AltCLIPConfig', 'AltCLIPTextConfig', 'AltCLIPVisionConfig'},
+            'altclip.processing_altclip': {'AltCLIPProcessor'}
+        }
+    }
+
+    The import structure is a dict defined with frozensets as keys, and dicts of strings to sets of objects.
+    """
+    import_structure = create_import_structure_from_path(module_path)
+    return spread_import_structure(import_structure)
--- a/tests/models/longformer/test_modeling_longformer.py
+++ b/tests/models/longformer/test_modeling_longformer.py
@ -34,8 +34,8 @@ if is_torch_available():
        LongformerForSequenceClassification,
        LongformerForTokenClassification,
        LongformerModel,
-        LongformerSelfAttention,
    )
+    from transformers.models.longformer.modeling_longformer import LongformerSelfAttention


 class LongformerModelTester:
--- a/tests/models/longformer/test_modeling_tf_longformer.py
+++ b/tests/models/longformer/test_modeling_tf_longformer.py
@ -37,8 +37,8 @@ if is_tf_available():
        TFLongformerForSequenceClassification,
        TFLongformerForTokenClassification,
        TFLongformerModel,
-        TFLongformerSelfAttention,
    )
+    from transformers.models.longformer.modeling_tf_longformer import TFLongformerSelfAttention
    from transformers.tf_utils import shape_list


--- a/tests/models/reformer/test_modeling_reformer.py
+++ b/tests/models/reformer/test_modeling_reformer.py
@ -40,11 +40,11 @@ if is_torch_available():
        ReformerForMaskedLM,
        ReformerForQuestionAnswering,
        ReformerForSequenceClassification,
-        ReformerLayer,
        ReformerModel,
        ReformerModelWithLMHead,
        ReformerTokenizer,
    )
+    from transformers.models.reformer.modeling_reformer import ReformerLayer


 class ReformerModelTester:
--- a/tests/utils/import_structures/failing_export.py
+++ b/tests/utils/import_structures/failing_export.py
@ -0,0 +1,23 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# fmt: off
+
+from transformers.utils.import_utils import export
+
+
+@export(backends=("random_item_that_should_not_exist",))
+class A0:
+    def __init__(self):
+        pass
--- a/tests/utils/import_structures/import_structure_raw_register.py
+++ b/tests/utils/import_structures/import_structure_raw_register.py
@ -0,0 +1,80 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# fmt: off
+
+from transformers.utils.import_utils import export
+
+
+@export()
+class A0:
+    def __init__(self):
+        pass
+
+
+@export()
+def a0():
+    pass
+
+
+@export(backends=("torch", "tf"))
+class A1:
+    def __init__(self):
+        pass
+
+
+@export(backends=("torch", "tf"))
+def a1():
+    pass
+
+
+@export(
+    backends=("torch", "tf")
+)
+class A2:
+    def __init__(self):
+        pass
+
+
+@export(
+    backends=("torch", "tf")
+)
+def a2():
+    pass
+
+
+@export(
+    backends=(
+        "torch",
+        "tf"
+    )
+)
+class A3:
+    def __init__(self):
+        pass
+
+
+@export(
+    backends=(
+            "torch",
+            "tf"
+    )
+)
+def a3():
+    pass
+
+@export(backends=())
+class A4:
+    def __init__(self):
+        pass
--- a/tests/utils/import_structures/import_structure_register_with_comments.py
+++ b/tests/utils/import_structures/import_structure_register_with_comments.py
@ -0,0 +1,79 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# fmt: off
+
+from transformers.utils.import_utils import export
+
+
+@export()
+# That's a statement
+class B0:
+    def __init__(self):
+        pass
+
+
+@export()
+# That's a statement
+def b0():
+    pass
+
+
+@export(backends=("torch", "tf"))
+# That's a statement
+class B1:
+    def __init__(self):
+        pass
+
+
+@export(backends=("torch", "tf"))
+# That's a statement
+def b1():
+    pass
+
+
+@export(backends=("torch", "tf"))
+# That's a statement
+class B2:
+    def __init__(self):
+        pass
+
+
+@export(backends=("torch", "tf"))
+# That's a statement
+def b2():
+    pass
+
+
+@export(
+    backends=(
+        "torch",
+        "tf"
+    )
+)
+# That's a statement
+class B3:
+    def __init__(self):
+        pass
+
+
+@export(
+    backends=(
+        "torch",
+        "tf"
+    )
+)
+# That's a statement
+def b3():
+    pass
--- a/tests/utils/import_structures/import_structure_register_with_duplicates.py
+++ b/tests/utils/import_structures/import_structure_register_with_duplicates.py
@ -0,0 +1,77 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# fmt: off
+
+from transformers.utils.import_utils import export
+
+
+@export(backends=("torch", "torch"))
+class C0:
+    def __init__(self):
+        pass
+
+
+@export(backends=("torch", "torch"))
+def c0():
+    pass
+
+
+@export(backends=("torch", "torch"))
+# That's a statement
+class C1:
+    def __init__(self):
+        pass
+
+
+@export(backends=("torch", "torch"))
+# That's a statement
+def c1():
+    pass
+
+
+@export(backends=("torch", "torch"))
+# That's a statement
+class C2:
+    def __init__(self):
+        pass
+
+
+@export(backends=("torch", "torch"))
+# That's a statement
+def c2():
+    pass
+
+
+@export(
+    backends=(
+        "torch",
+        "torch"
+    )
+)
+# That's a statement
+class C3:
+    def __init__(self):
+        pass
+
+
+@export(
+    backends=(
+        "torch",
+        "torch"
+    )
+)
+# That's a statement
+def c3():
+    pass
--- a/tests/utils/test_import_structure.py
+++ b/tests/utils/test_import_structure.py
@ -0,0 +1,98 @@
+import os
+import unittest
+from pathlib import Path
+
+from transformers.utils.import_utils import define_import_structure, spread_import_structure
+
+
+import_structures = Path("import_structures")
+
+
+def fetch__all__(file_content):
+    """
+    Returns the content of the __all__ variable in the file content.
+    Returns None if not defined, otherwise returns a list of strings.
+    """
+    lines = file_content.split("\n")
+    for line_index in range(len(lines)):
+        line = lines[line_index]
+        if line.startswith("__all__ = "):
+            # __all__ is defined on a single line
+            if line.endswith("]"):
+                return [obj.strip("\"' ") for obj in line.split("=")[1].strip(" []").split(",")]
+
+            # __all__ is defined on multiple lines
+            else:
+                _all = []
+                for __all__line_index in range(line_index + 1, len(lines)):
+                    if lines[__all__line_index].strip() == "]":
+                        return _all
+                    else:
+                        _all.append(lines[__all__line_index].strip("\"', "))
+
+
+class TestImportStructures(unittest.TestCase):
+    base_transformers_path = Path(__file__).parent.parent.parent
+    models_path = base_transformers_path / "src" / "transformers" / "models"
+    models_import_structure = spread_import_structure(define_import_structure(models_path))
+
+    def test_definition(self):
+        import_structure = define_import_structure(import_structures)
+        import_structure_definition = {
+            frozenset(()): {
+                "import_structure_raw_register": {"A0", "a0", "A4"},
+                "import_structure_register_with_comments": {"B0", "b0"},
+            },
+            frozenset(("tf", "torch")): {
+                "import_structure_raw_register": {"A1", "a1", "A2", "a2", "A3", "a3"},
+                "import_structure_register_with_comments": {"B1", "b1", "B2", "b2", "B3", "b3"},
+            },
+            frozenset(("torch",)): {
+                "import_structure_register_with_duplicates": {"C0", "c0", "C1", "c1", "C2", "c2", "C3", "c3"},
+            },
+        }
+
+        self.assertDictEqual(import_structure, import_structure_definition)
+
+    def test_transformers_specific_model_import(self):
+        """
+        This test ensures that there is equivalence between what is written down in __all__ and what is
+        written down with register().
+
+        It doesn't test the backends attributed to register().
+        """
+        for architecture in os.listdir(self.models_path):
+            if (
+                os.path.isfile(self.models_path / architecture)
+                or architecture.startswith("_")
+                or architecture == "deprecated"
+            ):
+                continue
+
+            with self.subTest(f"Testing arch {architecture}"):
+                import_structure = define_import_structure(self.models_path / architecture)
+                backend_agnostic_import_structure = {}
+                for requirement, module_object_mapping in import_structure.items():
+                    for module, objects in module_object_mapping.items():
+                        if module not in backend_agnostic_import_structure:
+                            backend_agnostic_import_structure[module] = []
+
+                        backend_agnostic_import_structure[module].extend(objects)
+
+                for module, objects in backend_agnostic_import_structure.items():
+                    with open(self.models_path / architecture / f"{module}.py") as f:
+                        content = f.read()
+                        _all = fetch__all__(content)
+
+                        if _all is None:
+                            raise ValueError(f"{module} doesn't have __all__ defined.")
+
+                        error_message = (
+                            f"self.models_path / architecture / f'{module}.py doesn't seem to be defined correctly:\n"
+                            f"Defined in __all__: {sorted(_all)}\nDefined with register: {sorted(objects)}"
+                        )
+                        self.assertListEqual(sorted(objects), sorted(_all), msg=error_message)
+
+    def test_export_backend_should_be_defined(self):
+        with self.assertRaisesRegex(ValueError, "Backend should be defined in the BACKENDS_MAPPING"):
+            pass
--- a/utils/custom_init_isort.py
+++ b/utils/custom_init_isort.py
@ -244,7 +244,7 @@ def sort_imports(file: str, check_only: bool = True):
        code = f.read()

    # If the file is not a custom init, there is nothing to do.
-    if "_import_structure" not in code:
+    if "_import_structure" not in code or "define_import_structure" in code:
        return

    # Blocks of indent level 0