Import structure & first three model refactors (#31329)

* Import structure & first three model refactors * Register -> Export. Export all in __all__. Sensible defaults according to filename. * Apply most comments from Amy and some comments from Lucain Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> Co-authored-by: Lucain Pouget <lucainp@gmail.com> * Style * Add comment * Clearer .py management * Raise if not in backend mapping * More specific type * More efficient listdir * Misc fixes --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> Co-authored-by: Lucain Pouget <lucainp@gmail.com>
2025-07-03 12:50:06 +06:00 · 2024-09-10 11:10:53 +02:00 · 2024-09-10 11:10:53 +02:00 · f24f084329
commit f24f084329
parent 7f112caac2
30 changed files with 937 additions and 553 deletions
--- a/2
+++ b/2
@ -53,7 +53,6 @@ quality:
 	@python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
 	ruff check $(check_dirs) setup.py conftest.py
 	ruff format --check $(check_dirs) setup.py conftest.py
 	python utils/custom_init_isort.py --check_only
 	python utils/sort_auto_mappings.py --check_only
 	python utils/check_doc_toc.py
 	python utils/check_docstrings.py --check_all
@ -62,7 +61,6 @@ quality:
 # Format source code automatically and check is there are any problems left that need manual fixing
 extra_style_checks:
 	python utils/custom_init_isort.py
 	python utils/sort_auto_mappings.py
 	python utils/check_doc_toc.py --fix_and_overwrite
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@ -1500,7 +1500,6 @@ else:
            "BertForQuestionAnswering",
            "BertForSequenceClassification",
            "BertForTokenClassification",
            "BertLayer",
            "BertLMHeadModel",
            "BertModel",
            "BertPreTrainedModel",
@ -1524,7 +1523,6 @@ else:
            "BigBirdForQuestionAnswering",
            "BigBirdForSequenceClassification",
            "BigBirdForTokenClassification",
            "BigBirdLayer",
            "BigBirdModel",
            "BigBirdPreTrainedModel",
            "load_tf_weights_in_big_bird",
@ -1643,7 +1641,6 @@ else:
            "CanineForQuestionAnswering",
            "CanineForSequenceClassification",
            "CanineForTokenClassification",
            "CanineLayer",
            "CanineModel",
            "CaninePreTrainedModel",
            "load_tf_weights_in_canine",
@ -1730,7 +1727,6 @@ else:
            "ConvBertForQuestionAnswering",
            "ConvBertForSequenceClassification",
            "ConvBertForTokenClassification",
            "ConvBertLayer",
            "ConvBertModel",
            "ConvBertPreTrainedModel",
            "load_tf_weights_in_convbert",
@ -1959,7 +1955,6 @@ else:
            "QDQBertForQuestionAnswering",
            "QDQBertForSequenceClassification",
            "QDQBertForTokenClassification",
            "QDQBertLayer",
            "QDQBertLMHeadModel",
            "QDQBertModel",
            "QDQBertPreTrainedModel",
@ -2211,7 +2206,6 @@ else:
            "FNetForQuestionAnswering",
            "FNetForSequenceClassification",
            "FNetForTokenClassification",
            "FNetLayer",
            "FNetModel",
            "FNetPreTrainedModel",
        ]
@ -2312,7 +2306,6 @@ else:
            "GPTNeoXForQuestionAnswering",
            "GPTNeoXForSequenceClassification",
            "GPTNeoXForTokenClassification",
            "GPTNeoXLayer",
            "GPTNeoXModel",
            "GPTNeoXPreTrainedModel",
        ]
@ -2320,7 +2313,6 @@ else:
    _import_structure["models.gpt_neox_japanese"].extend(
        [
            "GPTNeoXJapaneseForCausalLM",
            "GPTNeoXJapaneseLayer",
            "GPTNeoXJapaneseModel",
            "GPTNeoXJapanesePreTrainedModel",
        ]
@ -2552,7 +2544,6 @@ else:
            "LongformerForTokenClassification",
            "LongformerModel",
            "LongformerPreTrainedModel",
            "LongformerSelfAttention",
        ]
    )
    _import_structure["models.longt5"].extend(
@ -2585,7 +2576,6 @@ else:
            "LxmertModel",
            "LxmertPreTrainedModel",
            "LxmertVisualFeatureEncoder",
            "LxmertXLayer",
        ]
    )
    _import_structure["models.m2m_100"].extend(
@ -2609,7 +2599,9 @@ else:
            "Mamba2PreTrainedModel",
        ]
    )
-    _import_structure["models.marian"].extend(["MarianForCausalLM", "MarianModel", "MarianMTModel"])
+    _import_structure["models.marian"].extend(
        ["MarianForCausalLM", "MarianModel", "MarianMTModel", "MarianPreTrainedModel"]
    )
    _import_structure["models.markuplm"].extend(
        [
            "MarkupLMForQuestionAnswering",
@ -2692,7 +2684,6 @@ else:
            "MobileBertForQuestionAnswering",
            "MobileBertForSequenceClassification",
            "MobileBertForTokenClassification",
            "MobileBertLayer",
            "MobileBertModel",
            "MobileBertPreTrainedModel",
            "load_tf_weights_in_mobilebert",
@ -2738,7 +2729,6 @@ else:
            "MPNetForQuestionAnswering",
            "MPNetForSequenceClassification",
            "MPNetForTokenClassification",
            "MPNetLayer",
            "MPNetModel",
            "MPNetPreTrainedModel",
        ]
@ -2828,7 +2818,6 @@ else:
            "NystromformerForQuestionAnswering",
            "NystromformerForSequenceClassification",
            "NystromformerForTokenClassification",
            "NystromformerLayer",
            "NystromformerModel",
            "NystromformerPreTrainedModel",
        ]
@ -2942,7 +2931,6 @@ else:
            "PerceiverForMultimodalAutoencoding",
            "PerceiverForOpticalFlow",
            "PerceiverForSequenceClassification",
            "PerceiverLayer",
            "PerceiverModel",
            "PerceiverPreTrainedModel",
        ]
@ -3078,11 +3066,9 @@ else:
    )
    _import_structure["models.reformer"].extend(
        [
            "ReformerAttention",
            "ReformerForMaskedLM",
            "ReformerForQuestionAnswering",
            "ReformerForSequenceClassification",
            "ReformerLayer",
            "ReformerModel",
            "ReformerModelWithLMHead",
            "ReformerPreTrainedModel",
@ -3103,7 +3089,6 @@ else:
            "RemBertForQuestionAnswering",
            "RemBertForSequenceClassification",
            "RemBertForTokenClassification",
            "RemBertLayer",
            "RemBertModel",
            "RemBertPreTrainedModel",
            "load_tf_weights_in_rembert",
@ -3150,7 +3135,6 @@ else:
            "RoCBertForQuestionAnswering",
            "RoCBertForSequenceClassification",
            "RoCBertForTokenClassification",
            "RoCBertLayer",
            "RoCBertModel",
            "RoCBertPreTrainedModel",
            "load_tf_weights_in_roc_bert",
@ -3164,7 +3148,6 @@ else:
            "RoFormerForQuestionAnswering",
            "RoFormerForSequenceClassification",
            "RoFormerForTokenClassification",
            "RoFormerLayer",
            "RoFormerModel",
            "RoFormerPreTrainedModel",
            "load_tf_weights_in_roformer",
@ -3221,7 +3204,6 @@ else:
            "SegformerDecodeHead",
            "SegformerForImageClassification",
            "SegformerForSemanticSegmentation",
            "SegformerLayer",
            "SegformerModel",
            "SegformerPreTrainedModel",
        ]
@ -3280,7 +3262,6 @@ else:
        [
            "SplinterForPreTraining",
            "SplinterForQuestionAnswering",
            "SplinterLayer",
            "SplinterModel",
            "SplinterPreTrainedModel",
        ]
@ -3293,7 +3274,6 @@ else:
            "SqueezeBertForSequenceClassification",
            "SqueezeBertForTokenClassification",
            "SqueezeBertModel",
            "SqueezeBertModule",
            "SqueezeBertPreTrainedModel",
        ]
    )
@ -3492,7 +3472,6 @@ else:
            "ViltForMaskedLM",
            "ViltForQuestionAnswering",
            "ViltForTokenClassification",
            "ViltLayer",
            "ViltModel",
            "ViltPreTrainedModel",
        ]
@ -3512,7 +3491,6 @@ else:
            "VisualBertForQuestionAnswering",
            "VisualBertForRegionToPhraseAlignment",
            "VisualBertForVisualReasoning",
            "VisualBertLayer",
            "VisualBertModel",
            "VisualBertPreTrainedModel",
        ]
@ -3528,7 +3506,6 @@ else:
    _import_structure["models.vit_mae"].extend(
        [
            "ViTMAEForPreTraining",
            "ViTMAELayer",
            "ViTMAEModel",
            "ViTMAEPreTrainedModel",
        ]
@ -3708,7 +3685,6 @@ else:
            "YosoForQuestionAnswering",
            "YosoForSequenceClassification",
            "YosoForTokenClassification",
            "YosoLayer",
            "YosoModel",
            "YosoPreTrainedModel",
        ]
@ -3855,7 +3831,6 @@ else:
    )
    _import_structure["models.bert"].extend(
        [
            "TFBertEmbeddings",
            "TFBertForMaskedLM",
            "TFBertForMultipleChoice",
            "TFBertForNextSentencePrediction",
@ -3921,7 +3896,6 @@ else:
            "TFConvBertForQuestionAnswering",
            "TFConvBertForSequenceClassification",
            "TFConvBertForTokenClassification",
            "TFConvBertLayer",
            "TFConvBertModel",
            "TFConvBertPreTrainedModel",
        ]
@ -4152,7 +4126,6 @@ else:
            "TFLongformerForTokenClassification",
            "TFLongformerModel",
            "TFLongformerPreTrainedModel",
            "TFLongformerSelfAttention",
        ]
    )
    _import_structure["models.lxmert"].extend(
@ -4253,7 +4226,6 @@ else:
            "TFRemBertForQuestionAnswering",
            "TFRemBertForSequenceClassification",
            "TFRemBertForTokenClassification",
            "TFRemBertLayer",
            "TFRemBertModel",
            "TFRemBertPreTrainedModel",
        ]
@ -4299,7 +4271,6 @@ else:
            "TFRoFormerForQuestionAnswering",
            "TFRoFormerForSequenceClassification",
            "TFRoFormerForTokenClassification",
            "TFRoFormerLayer",
            "TFRoFormerModel",
            "TFRoFormerPreTrainedModel",
        ]
@ -5829,7 +5800,8 @@ if TYPE_CHECKING:
        from .models.llama import LlamaTokenizer
        from .models.m2m_100 import M2M100Tokenizer
        from .models.marian import MarianTokenizer
-        from .models.mbart import MBart50Tokenizer, MBartTokenizer
+        from .models.mbart import MBartTokenizer
        from .models.mbart50 import MBart50Tokenizer
        from .models.mluke import MLukeTokenizer
        from .models.mt5 import MT5Tokenizer
        from .models.nllb import NllbTokenizer
@ -6300,7 +6272,6 @@ if TYPE_CHECKING:
            BertForQuestionAnswering,
            BertForSequenceClassification,
            BertForTokenClassification,
            BertLayer,
            BertLMHeadModel,
            BertModel,
            BertPreTrainedModel,
@ -6320,7 +6291,6 @@ if TYPE_CHECKING:
            BigBirdForQuestionAnswering,
            BigBirdForSequenceClassification,
            BigBirdForTokenClassification,
            BigBirdLayer,
            BigBirdModel,
            BigBirdPreTrainedModel,
            load_tf_weights_in_big_bird,
@ -6415,7 +6385,6 @@ if TYPE_CHECKING:
            CanineForQuestionAnswering,
            CanineForSequenceClassification,
            CanineForTokenClassification,
            CanineLayer,
            CanineModel,
            CaninePreTrainedModel,
            load_tf_weights_in_canine,
@ -6488,7 +6457,6 @@ if TYPE_CHECKING:
            ConvBertForQuestionAnswering,
            ConvBertForSequenceClassification,
            ConvBertForTokenClassification,
            ConvBertLayer,
            ConvBertModel,
            ConvBertPreTrainedModel,
            load_tf_weights_in_convbert,
@ -6673,7 +6641,6 @@ if TYPE_CHECKING:
            QDQBertForQuestionAnswering,
            QDQBertForSequenceClassification,
            QDQBertForTokenClassification,
            QDQBertLayer,
            QDQBertLMHeadModel,
            QDQBertModel,
            QDQBertPreTrainedModel,
@ -6872,7 +6839,6 @@ if TYPE_CHECKING:
            FNetForQuestionAnswering,
            FNetForSequenceClassification,
            FNetForTokenClassification,
            FNetLayer,
            FNetModel,
            FNetPreTrainedModel,
        )
@ -6960,13 +6926,11 @@ if TYPE_CHECKING:
            GPTNeoXForQuestionAnswering,
            GPTNeoXForSequenceClassification,
            GPTNeoXForTokenClassification,
            GPTNeoXLayer,
            GPTNeoXModel,
            GPTNeoXPreTrainedModel,
        )
        from .models.gpt_neox_japanese import (
            GPTNeoXJapaneseForCausalLM,
            GPTNeoXJapaneseLayer,
            GPTNeoXJapaneseModel,
            GPTNeoXJapanesePreTrainedModel,
        )
@ -7142,7 +7106,6 @@ if TYPE_CHECKING:
            LongformerForTokenClassification,
            LongformerModel,
            LongformerPreTrainedModel,
            LongformerSelfAttention,
        )
        from .models.longt5 import (
            LongT5EncoderModel,
@ -7169,7 +7132,6 @@ if TYPE_CHECKING:
            LxmertModel,
            LxmertPreTrainedModel,
            LxmertVisualFeatureEncoder,
            LxmertXLayer,
        )
        from .models.m2m_100 import (
            M2M100ForConditionalGeneration,
@ -7186,7 +7148,7 @@ if TYPE_CHECKING:
            Mamba2Model,
            Mamba2PreTrainedModel,
        )
-        from .models.marian import MarianForCausalLM, MarianModel, MarianMTModel
+        from .models.marian import MarianForCausalLM, MarianModel, MarianMTModel, MarianPreTrainedModel
        from .models.markuplm import (
            MarkupLMForQuestionAnswering,
            MarkupLMForSequenceClassification,
@ -7252,7 +7214,6 @@ if TYPE_CHECKING:
            MobileBertForQuestionAnswering,
            MobileBertForSequenceClassification,
            MobileBertForTokenClassification,
            MobileBertLayer,
            MobileBertModel,
            MobileBertPreTrainedModel,
            load_tf_weights_in_mobilebert,
@ -7288,7 +7249,6 @@ if TYPE_CHECKING:
            MPNetForQuestionAnswering,
            MPNetForSequenceClassification,
            MPNetForTokenClassification,
            MPNetLayer,
            MPNetModel,
            MPNetPreTrainedModel,
        )
@ -7360,7 +7320,6 @@ if TYPE_CHECKING:
            NystromformerForQuestionAnswering,
            NystromformerForSequenceClassification,
            NystromformerForTokenClassification,
            NystromformerLayer,
            NystromformerModel,
            NystromformerPreTrainedModel,
        )
@ -7448,7 +7407,6 @@ if TYPE_CHECKING:
            PerceiverForMultimodalAutoencoding,
            PerceiverForOpticalFlow,
            PerceiverForSequenceClassification,
            PerceiverLayer,
            PerceiverModel,
            PerceiverPreTrainedModel,
        )
@ -7550,11 +7508,9 @@ if TYPE_CHECKING:
            RecurrentGemmaPreTrainedModel,
        )
        from .models.reformer import (
            ReformerAttention,
            ReformerForMaskedLM,
            ReformerForQuestionAnswering,
            ReformerForSequenceClassification,
            ReformerLayer,
            ReformerModel,
            ReformerModelWithLMHead,
            ReformerPreTrainedModel,
@ -7571,7 +7527,6 @@ if TYPE_CHECKING:
            RemBertForQuestionAnswering,
            RemBertForSequenceClassification,
            RemBertForTokenClassification,
            RemBertLayer,
            RemBertModel,
            RemBertPreTrainedModel,
            load_tf_weights_in_rembert,
@ -7610,7 +7565,6 @@ if TYPE_CHECKING:
            RoCBertForQuestionAnswering,
            RoCBertForSequenceClassification,
            RoCBertForTokenClassification,
            RoCBertLayer,
            RoCBertModel,
            RoCBertPreTrainedModel,
            load_tf_weights_in_roc_bert,
@ -7622,7 +7576,6 @@ if TYPE_CHECKING:
            RoFormerForQuestionAnswering,
            RoFormerForSequenceClassification,
            RoFormerForTokenClassification,
            RoFormerLayer,
            RoFormerModel,
            RoFormerPreTrainedModel,
            load_tf_weights_in_roformer,
@ -7667,7 +7620,6 @@ if TYPE_CHECKING:
            SegformerDecodeHead,
            SegformerForImageClassification,
            SegformerForSemanticSegmentation,
            SegformerLayer,
            SegformerModel,
            SegformerPreTrainedModel,
        )
@ -7712,7 +7664,6 @@ if TYPE_CHECKING:
        from .models.splinter import (
            SplinterForPreTraining,
            SplinterForQuestionAnswering,
            SplinterLayer,
            SplinterModel,
            SplinterPreTrainedModel,
        )
@ -7723,7 +7674,6 @@ if TYPE_CHECKING:
            SqueezeBertForSequenceClassification,
            SqueezeBertForTokenClassification,
            SqueezeBertModel,
            SqueezeBertModule,
            SqueezeBertPreTrainedModel,
        )
        from .models.stablelm import (
@ -7872,7 +7822,6 @@ if TYPE_CHECKING:
            ViltForMaskedLM,
            ViltForQuestionAnswering,
            ViltForTokenClassification,
            ViltLayer,
            ViltModel,
            ViltPreTrainedModel,
        )
@ -7888,7 +7837,6 @@ if TYPE_CHECKING:
            VisualBertForQuestionAnswering,
            VisualBertForRegionToPhraseAlignment,
            VisualBertForVisualReasoning,
            VisualBertLayer,
            VisualBertModel,
            VisualBertPreTrainedModel,
        )
@ -7900,7 +7848,6 @@ if TYPE_CHECKING:
        )
        from .models.vit_mae import (
            ViTMAEForPreTraining,
            ViTMAELayer,
            ViTMAEModel,
            ViTMAEPreTrainedModel,
        )
@ -8042,7 +7989,6 @@ if TYPE_CHECKING:
            YosoForQuestionAnswering,
            YosoForSequenceClassification,
            YosoForTokenClassification,
            YosoLayer,
            YosoModel,
            YosoPreTrainedModel,
        )
@ -8176,7 +8122,6 @@ if TYPE_CHECKING:
            TFBartPretrainedModel,
        )
        from .models.bert import (
            TFBertEmbeddings,
            TFBertForMaskedLM,
            TFBertForMultipleChoice,
            TFBertForNextSentencePrediction,
@ -8230,7 +8175,6 @@ if TYPE_CHECKING:
            TFConvBertForQuestionAnswering,
            TFConvBertForSequenceClassification,
            TFConvBertForTokenClassification,
            TFConvBertLayer,
            TFConvBertModel,
            TFConvBertPreTrainedModel,
        )
@ -8415,7 +8359,6 @@ if TYPE_CHECKING:
            TFLongformerForTokenClassification,
            TFLongformerModel,
            TFLongformerPreTrainedModel,
            TFLongformerSelfAttention,
        )
        from .models.lxmert import (
            TFLxmertForPreTraining,
@ -8505,7 +8448,6 @@ if TYPE_CHECKING:
            TFRemBertForQuestionAnswering,
            TFRemBertForSequenceClassification,
            TFRemBertForTokenClassification,
            TFRemBertLayer,
            TFRemBertModel,
            TFRemBertPreTrainedModel,
        )
@ -8543,7 +8485,6 @@ if TYPE_CHECKING:
            TFRoFormerForQuestionAnswering,
            TFRoFormerForSequenceClassification,
            TFRoFormerForTokenClassification,
            TFRoFormerLayer,
            TFRoFormerModel,
            TFRoFormerPreTrainedModel,
        )
--- a/src/transformers/models/albert/init.py
+++ b/src/transformers/models/albert/init.py
@ -11,165 +11,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from typing import TYPE_CHECKING
-from ...utils import (
+from ...utils import _LazyModule
-    OptionalDependencyNotAvailable,
+from ...utils.import_utils import define_import_structure
    _LazyModule,
    is_flax_available,
    is_sentencepiece_available,
    is_tf_available,
    is_tokenizers_available,
    is_torch_available,
 )
 _import_structure = {
    "configuration_albert": ["AlbertConfig", "AlbertOnnxConfig"],
 }
 try:
    if not is_sentencepiece_available():
        raise OptionalDependencyNotAvailable()
 except OptionalDependencyNotAvailable:
    pass
 else:
    _import_structure["tokenization_albert"] = ["AlbertTokenizer"]
 try:
    if not is_tokenizers_available():
        raise OptionalDependencyNotAvailable()
 except OptionalDependencyNotAvailable:
    pass
 else:
    _import_structure["tokenization_albert_fast"] = ["AlbertTokenizerFast"]
 try:
    if not is_torch_available():
        raise OptionalDependencyNotAvailable()
 except OptionalDependencyNotAvailable:
    pass
 else:
    _import_structure["modeling_albert"] = [
        "AlbertForMaskedLM",
        "AlbertForMultipleChoice",
        "AlbertForPreTraining",
        "AlbertForQuestionAnswering",
        "AlbertForSequenceClassification",
        "AlbertForTokenClassification",
        "AlbertModel",
        "AlbertPreTrainedModel",
        "load_tf_weights_in_albert",
    ]
 try:
    if not is_tf_available():
        raise OptionalDependencyNotAvailable()
 except OptionalDependencyNotAvailable:
    pass
 else:
    _import_structure["modeling_tf_albert"] = [
        "TFAlbertForMaskedLM",
        "TFAlbertForMultipleChoice",
        "TFAlbertForPreTraining",
        "TFAlbertForQuestionAnswering",
        "TFAlbertForSequenceClassification",
        "TFAlbertForTokenClassification",
        "TFAlbertMainLayer",
        "TFAlbertModel",
        "TFAlbertPreTrainedModel",
    ]
 try:
    if not is_flax_available():
        raise OptionalDependencyNotAvailable()
 except OptionalDependencyNotAvailable:
    pass
 else:
    _import_structure["modeling_flax_albert"] = [
        "FlaxAlbertForMaskedLM",
        "FlaxAlbertForMultipleChoice",
        "FlaxAlbertForPreTraining",
        "FlaxAlbertForQuestionAnswering",
        "FlaxAlbertForSequenceClassification",
        "FlaxAlbertForTokenClassification",
        "FlaxAlbertModel",
        "FlaxAlbertPreTrainedModel",
    ]
 if TYPE_CHECKING:
-    from .configuration_albert import AlbertConfig, AlbertOnnxConfig
+    from .configuration_albert import *
-
+    from .modeling_albert import *
-    try:
+    from .modeling_flax_albert import *
-        if not is_sentencepiece_available():
+    from .modeling_tf_albert import *
-            raise OptionalDependencyNotAvailable()
+    from .tokenization_albert import *
-    except OptionalDependencyNotAvailable:
+    from .tokenization_albert_fast import *
        pass
    else:
        from .tokenization_albert import AlbertTokenizer
    try:
        if not is_tokenizers_available():
            raise OptionalDependencyNotAvailable()
    except OptionalDependencyNotAvailable:
        pass
    else:
        from .tokenization_albert_fast import AlbertTokenizerFast
    try:
        if not is_torch_available():
            raise OptionalDependencyNotAvailable()
    except OptionalDependencyNotAvailable:
        pass
    else:
        from .modeling_albert import (
            AlbertForMaskedLM,
            AlbertForMultipleChoice,
            AlbertForPreTraining,
            AlbertForQuestionAnswering,
            AlbertForSequenceClassification,
            AlbertForTokenClassification,
            AlbertModel,
            AlbertPreTrainedModel,
            load_tf_weights_in_albert,
        )
    try:
        if not is_tf_available():
            raise OptionalDependencyNotAvailable()
    except OptionalDependencyNotAvailable:
        pass
    else:
        from .modeling_tf_albert import (
            TFAlbertForMaskedLM,
            TFAlbertForMultipleChoice,
            TFAlbertForPreTraining,
            TFAlbertForQuestionAnswering,
            TFAlbertForSequenceClassification,
            TFAlbertForTokenClassification,
            TFAlbertMainLayer,
            TFAlbertModel,
            TFAlbertPreTrainedModel,
        )
    try:
        if not is_flax_available():
            raise OptionalDependencyNotAvailable()
    except OptionalDependencyNotAvailable:
        pass
    else:
        from .modeling_flax_albert import (
            FlaxAlbertForMaskedLM,
            FlaxAlbertForMultipleChoice,
            FlaxAlbertForPreTraining,
            FlaxAlbertForQuestionAnswering,
            FlaxAlbertForSequenceClassification,
            FlaxAlbertForTokenClassification,
            FlaxAlbertModel,
            FlaxAlbertPreTrainedModel,
        )
 else:
    import sys
-    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
+    _file = globals()["__file__"]
    sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
--- a/src/transformers/models/albert/configuration_albert.py
+++ b/src/transformers/models/albert/configuration_albert.py
@ -165,3 +165,6 @@ class AlbertOnnxConfig(OnnxConfig):
                ("token_type_ids", dynamic_axis),
            ]
        )
 __all__ = ["AlbertConfig", "AlbertOnnxConfig"]
--- a/src/transformers/models/albert/modeling_albert.py
+++ b/src/transformers/models/albert/modeling_albert.py
@ -1466,3 +1466,16 @@ class AlbertForMultipleChoice(AlbertPreTrainedModel):
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
 __all__ = [
    "load_tf_weights_in_albert",
    "AlbertPreTrainedModel",
    "AlbertModel",
    "AlbertForPreTraining",
    "AlbertForMaskedLM",
    "AlbertForSequenceClassification",
    "AlbertForTokenClassification",
    "AlbertForQuestionAnswering",
    "AlbertForMultipleChoice",
 ]
--- a/src/transformers/models/albert/modeling_flax_albert.py
+++ b/src/transformers/models/albert/modeling_flax_albert.py
@ -1119,3 +1119,14 @@ append_call_sample_docstring(
    FlaxQuestionAnsweringModelOutput,
    _CONFIG_FOR_DOC,
 )
 __all__ = [
    "FlaxAlbertPreTrainedModel",
    "FlaxAlbertModel",
    "FlaxAlbertForPreTraining",
    "FlaxAlbertForMaskedLM",
    "FlaxAlbertForSequenceClassification",
    "FlaxAlbertForMultipleChoice",
    "FlaxAlbertForTokenClassification",
    "FlaxAlbertForQuestionAnswering",
 ]
--- a/src/transformers/models/albert/modeling_tf_albert.py
+++ b/src/transformers/models/albert/modeling_tf_albert.py
@ -1558,3 +1558,16 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss):
        if getattr(self, "classifier", None) is not None:
            with tf.name_scope(self.classifier.name):
                self.classifier.build([None, None, self.config.hidden_size])
 __all__ = [
    "TFAlbertPreTrainedModel",
    "TFAlbertModel",
    "TFAlbertForPreTraining",
    "TFAlbertForMaskedLM",
    "TFAlbertForSequenceClassification",
    "TFAlbertForTokenClassification",
    "TFAlbertForQuestionAnswering",
    "TFAlbertForMultipleChoice",
    "TFAlbertMainLayer",
 ]
--- a/src/transformers/models/albert/tokenization_albert.py
+++ b/src/transformers/models/albert/tokenization_albert.py
@ -23,6 +23,7 @@ import sentencepiece as spm
 from ...tokenization_utils import AddedToken, PreTrainedTokenizer
 from ...utils import logging
 from ...utils.import_utils import export
 logger = logging.get_logger(__name__)
@ -32,6 +33,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"}
 SPIECE_UNDERLINE = "▁"
@export(backends=("sentencepiece",))
 class AlbertTokenizer(PreTrainedTokenizer):
    """
    Construct an ALBERT tokenizer. Based on [SentencePiece](https://github.com/google/sentencepiece).
@ -343,3 +345,6 @@ class AlbertTokenizer(PreTrainedTokenizer):
                fi.write(content_spiece_model)
        return (out_vocab_file,)
 __all__ = ["AlbertTokenizer"]
--- a/src/transformers/models/albert/tokenization_albert_fast.py
+++ b/src/transformers/models/albert/tokenization_albert_fast.py
@ -207,3 +207,6 @@ class AlbertTokenizerFast(PreTrainedTokenizerFast):
            copyfile(self.vocab_file, out_vocab_file)
        return (out_vocab_file,)
 __all__ = ["AlbertTokenizerFast"]
--- a/src/transformers/models/align/init.py
+++ b/src/transformers/models/align/init.py
@ -13,57 +13,16 @@
 # limitations under the License.
 from typing import TYPE_CHECKING
-from ...utils import (
+from ...utils import _LazyModule
-    OptionalDependencyNotAvailable,
+from ...utils.import_utils import define_import_structure
    _LazyModule,
    is_torch_available,
 )
 _import_structure = {
    "configuration_align": [
        "AlignConfig",
        "AlignTextConfig",
        "AlignVisionConfig",
    ],
    "processing_align": ["AlignProcessor"],
 }
 try:
    if not is_torch_available():
        raise OptionalDependencyNotAvailable()
 except OptionalDependencyNotAvailable:
    pass
 else:
    _import_structure["modeling_align"] = [
        "AlignModel",
        "AlignPreTrainedModel",
        "AlignTextModel",
        "AlignVisionModel",
    ]
 if TYPE_CHECKING:
-    from .configuration_align import (
+    from .configuration_align import *
-        AlignConfig,
+    from .modeling_align import *
-        AlignTextConfig,
+    from .processing_align import *
        AlignVisionConfig,
    )
    from .processing_align import AlignProcessor
    try:
        if not is_torch_available():
            raise OptionalDependencyNotAvailable()
    except OptionalDependencyNotAvailable:
        pass
    else:
        from .modeling_align import (
            AlignModel,
            AlignPreTrainedModel,
            AlignTextModel,
            AlignVisionModel,
        )
 else:
    import sys
-    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
+    _file = globals()["__file__"]
    sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
--- a/src/transformers/models/align/configuration_align.py
+++ b/src/transformers/models/align/configuration_align.py
@ -378,3 +378,6 @@ class AlignConfig(PretrainedConfig):
        """
        return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
 __all__ = ["AlignTextConfig", "AlignVisionConfig", "AlignConfig"]
--- a/src/transformers/models/align/modeling_align.py
+++ b/src/transformers/models/align/modeling_align.py
@ -1636,3 +1636,6 @@ class AlignModel(AlignPreTrainedModel):
            text_model_output=text_outputs,
            vision_model_output=vision_outputs,
        )
 __all__ = ["AlignPreTrainedModel", "AlignTextModel", "AlignVisionModel", "AlignModel"]
--- a/src/transformers/models/align/processing_align.py
+++ b/src/transformers/models/align/processing_align.py
@ -162,3 +162,6 @@ class AlignProcessor(ProcessorMixin):
        tokenizer_input_names = self.tokenizer.model_input_names
        image_processor_input_names = self.image_processor.model_input_names
        return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
 __all__ = ["AlignProcessor"]
--- a/src/transformers/models/altclip/init.py
+++ b/src/transformers/models/altclip/init.py
@ -13,55 +13,16 @@
 # limitations under the License.
 from typing import TYPE_CHECKING
-from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
+from ...utils import _LazyModule
-
+from ...utils.import_utils import define_import_structure
 _import_structure = {
    "configuration_altclip": [
        "AltCLIPConfig",
        "AltCLIPTextConfig",
        "AltCLIPVisionConfig",
    ],
    "processing_altclip": ["AltCLIPProcessor"],
 }
 try:
    if not is_torch_available():
        raise OptionalDependencyNotAvailable()
 except OptionalDependencyNotAvailable:
    pass
 else:
    _import_structure["modeling_altclip"] = [
        "AltCLIPPreTrainedModel",
        "AltCLIPModel",
        "AltCLIPTextModel",
        "AltCLIPVisionModel",
    ]
 if TYPE_CHECKING:
-    from .configuration_altclip import (
+    from .configuration_altclip import *
-        AltCLIPConfig,
+    from .modeling_altclip import *
-        AltCLIPTextConfig,
+    from .processing_altclip import *
        AltCLIPVisionConfig,
    )
    from .processing_altclip import AltCLIPProcessor
    try:
        if not is_torch_available():
            raise OptionalDependencyNotAvailable()
    except OptionalDependencyNotAvailable:
        pass
    else:
        from .modeling_altclip import (
            AltCLIPModel,
            AltCLIPPreTrainedModel,
            AltCLIPTextModel,
            AltCLIPVisionModel,
        )
 else:
    import sys
-    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
+    _file = globals()["__file__"]
    sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
--- a/src/transformers/models/altclip/configuration_altclip.py
+++ b/src/transformers/models/altclip/configuration_altclip.py
@ -398,3 +398,6 @@ class AltCLIPConfig(PretrainedConfig):
        """
        return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
 __all__ = ["AltCLIPTextConfig", "AltCLIPVisionConfig", "AltCLIPConfig"]
--- a/src/transformers/models/altclip/modeling_altclip.py
+++ b/src/transformers/models/altclip/modeling_altclip.py
@ -1694,3 +1694,6 @@ def create_position_ids_from_input_ids(input_ids, padding_idx, past_key_values_l
    mask = input_ids.ne(padding_idx).int()
    incremental_indices = (torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length) * mask
    return incremental_indices.long() + padding_idx
 __all__ = ["AltCLIPPreTrainedModel", "AltCLIPVisionModel", "AltCLIPTextModel", "AltCLIPModel"]
--- a/src/transformers/models/altclip/processing_altclip.py
+++ b/src/transformers/models/altclip/processing_altclip.py
@ -130,3 +130,6 @@ class AltCLIPProcessor(ProcessorMixin):
        tokenizer_input_names = self.tokenizer.model_input_names
        image_processor_input_names = self.image_processor.model_input_names
        return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
 __all__ = ["AltCLIPProcessor"]
--- a/src/transformers/utils/dummy_pt_objects.py
+++ b/src/transformers/utils/dummy_pt_objects.py
@ -1262,13 +1262,6 @@ class BertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class BertLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class BertLMHeadModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -1368,13 +1361,6 @@ class BigBirdForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class BigBirdLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class BigBirdModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -1862,13 +1848,6 @@ class CanineForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class CanineLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class CanineModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -2230,13 +2209,6 @@ class ConvBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class ConvBertLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class ConvBertModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -3144,13 +3116,6 @@ class QDQBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class QDQBertLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class QDQBertLMHeadModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -4133,13 +4098,6 @@ class FNetForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class FNetLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class FNetModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -4572,13 +4530,6 @@ class GPTNeoXForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class GPTNeoXLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class GPTNeoXModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -4600,13 +4551,6 @@ class GPTNeoXJapaneseForCausalLM(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class GPTNeoXJapaneseLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class GPTNeoXJapaneseModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -5437,13 +5381,6 @@ class LongformerPreTrainedModel(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class LongformerSelfAttention(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class LongT5EncoderModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -5584,13 +5521,6 @@ class LxmertVisualFeatureEncoder(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class LxmertXLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class M2M100ForConditionalGeneration(metaclass=DummyObject):
    _backends = ["torch"]
@ -5675,6 +5605,13 @@ class MarianMTModel(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class MarianPreTrainedModel(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class MarkupLMForQuestionAnswering(metaclass=DummyObject):
    _backends = ["torch"]
@ -6011,13 +5948,6 @@ class MobileBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class MobileBertLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class MobileBertModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -6184,13 +6114,6 @@ class MPNetForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class MPNetLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class MPNetModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -6562,13 +6485,6 @@ class NystromformerForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class NystromformerLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class NystromformerModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -6993,13 +6909,6 @@ class PerceiverForSequenceClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class PerceiverLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class PerceiverModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -7469,13 +7378,6 @@ class RecurrentGemmaPreTrainedModel(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class ReformerAttention(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class ReformerForMaskedLM(metaclass=DummyObject):
    _backends = ["torch"]
@ -7497,13 +7399,6 @@ class ReformerForSequenceClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class ReformerLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class ReformerModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -7588,13 +7483,6 @@ class RemBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class RemBertLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class RemBertModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -7802,13 +7690,6 @@ class RoCBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class RoCBertLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class RoCBertModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -7869,13 +7750,6 @@ class RoFormerForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class RoFormerLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class RoFormerModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -8097,13 +7971,6 @@ class SegformerForSemanticSegmentation(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class SegformerLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class SegformerModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -8314,13 +8181,6 @@ class SplinterForQuestionAnswering(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class SplinterLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class SplinterModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -8377,13 +8237,6 @@ class SqueezeBertModel(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class SqueezeBertModule(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class SqueezeBertPreTrainedModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -9092,13 +8945,6 @@ class ViltForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class ViltLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class ViltModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -9176,13 +9022,6 @@ class VisualBertForVisualReasoning(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class VisualBertLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class VisualBertModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -9232,13 +9071,6 @@ class ViTMAEForPreTraining(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class ViTMAELayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class ViTMAEModel(metaclass=DummyObject):
    _backends = ["torch"]
@ -9957,13 +9789,6 @@ class YosoForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class YosoLayer(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class YosoModel(metaclass=DummyObject):
    _backends = ["torch"]
--- a/src/transformers/utils/dummy_sentencepiece_objects.py
+++ b/src/transformers/utils/dummy_sentencepiece_objects.py
@ -128,14 +128,14 @@ class MarianTokenizer(metaclass=DummyObject):
        requires_backends(self, ["sentencepiece"])
-class MBart50Tokenizer(metaclass=DummyObject):
+class MBartTokenizer(metaclass=DummyObject):
    _backends = ["sentencepiece"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["sentencepiece"])
-class MBartTokenizer(metaclass=DummyObject):
+class MBart50Tokenizer(metaclass=DummyObject):
    _backends = ["sentencepiece"]
    def __init__(self, *args, **kwargs):
--- a/src/transformers/utils/dummy_tf_objects.py
+++ b/src/transformers/utils/dummy_tf_objects.py
@ -478,13 +478,6 @@ class TFBartPretrainedModel(metaclass=DummyObject):
        requires_backends(self, ["tf"])
 class TFBertEmbeddings(metaclass=DummyObject):
    _backends = ["tf"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["tf"])
 class TFBertForMaskedLM(metaclass=DummyObject):
    _backends = ["tf"]
@ -772,13 +765,6 @@ class TFConvBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["tf"])
 class TFConvBertLayer(metaclass=DummyObject):
    _backends = ["tf"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["tf"])
 class TFConvBertModel(metaclass=DummyObject):
    _backends = ["tf"]
@ -1717,13 +1703,6 @@ class TFLongformerPreTrainedModel(metaclass=DummyObject):
        requires_backends(self, ["tf"])
 class TFLongformerSelfAttention(metaclass=DummyObject):
    _backends = ["tf"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["tf"])
 class TFLxmertForPreTraining(metaclass=DummyObject):
    _backends = ["tf"]
@ -2179,13 +2158,6 @@ class TFRemBertForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["tf"])
 class TFRemBertLayer(metaclass=DummyObject):
    _backends = ["tf"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["tf"])
 class TFRemBertModel(metaclass=DummyObject):
    _backends = ["tf"]
@ -2389,13 +2361,6 @@ class TFRoFormerForTokenClassification(metaclass=DummyObject):
        requires_backends(self, ["tf"])
 class TFRoFormerLayer(metaclass=DummyObject):
    _backends = ["tf"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["tf"])
 class TFRoFormerModel(metaclass=DummyObject):
    _backends = ["tf"]
--- a/src/transformers/utils/import_utils.py
+++ b/src/transformers/utils/import_utils.py
@ -15,6 +15,7 @@
 Import utilities: Utilities related to imports and our lazy inits.
 """
 import importlib.machinery
 import importlib.metadata
 import importlib.util
 import json
@ -27,7 +28,7 @@ from collections import OrderedDict
 from functools import lru_cache
 from itertools import chain
 from types import ModuleType
-from typing import Any, Optional, Tuple, Union
+from typing import Any, Dict, FrozenSet, Optional, Set, Tuple, Union
 from packaging import version
@ -1386,6 +1387,11 @@ explained here: https://www.tensorflow.org/text/guide/tf_text_intro.
 Please note that you may need to restart your runtime after installation.
 """
 # docstyle-ignore
 TORCHAUDIO_IMPORT_ERROR = """
 {0} requires the torchaudio library but it was not found in your environment. Please install it and restart your
 runtime.
 """
 # docstyle-ignore
 PANDAS_IMPORT_ERROR = """
@ -1561,6 +1567,7 @@ BACKENDS_MAPPING = OrderedDict(
        ("tf", (is_tf_available, TENSORFLOW_IMPORT_ERROR)),
        ("tensorflow_text", (is_tensorflow_text_available, TENSORFLOW_TEXT_IMPORT_ERROR)),
        ("timm", (is_timm_available, TIMM_IMPORT_ERROR)),
        ("torchaudio", (is_torchaudio_available, TORCHAUDIO_IMPORT_ERROR)),
        ("natten", (is_natten_available, NATTEN_IMPORT_ERROR)),
        ("nltk", (is_nltk_available, NLTK_IMPORT_ERROR)),
        ("tokenizers", (is_tokenizers_available, TOKENIZERS_IMPORT_ERROR)),
@ -1619,6 +1626,10 @@ def is_torch_fx_proxy(x):
    return False
 BACKENDS_T = FrozenSet[str]
 IMPORT_STRUCTURE_T = Dict[BACKENDS_T, Dict[str, Set[str]]]
 class _LazyModule(ModuleType):
    """
    Module class that surfaces all objects but only performs associated imports when the objects are requested.
@ -1626,8 +1637,58 @@ class _LazyModule(ModuleType):
    # Very heavily inspired by optuna.integration._IntegrationModule
    # https://github.com/optuna/optuna/blob/master/optuna/integration/__init__.py
-    def __init__(self, name, module_file, import_structure, module_spec=None, extra_objects=None):
+    def __init__(
        self,
        name: str,
        module_file: str,
        import_structure: IMPORT_STRUCTURE_T,
        module_spec: importlib.machinery.ModuleSpec = None,
        extra_objects: Dict[str, object] = None,
    ):
        super().__init__(name)
        self._object_missing_backend = {}
        if any(isinstance(key, frozenset) for key in import_structure.keys()):
            self._modules = set()
            self._class_to_module = {}
            self.__all__ = []
            _import_structure = {}
            for backends, module in import_structure.items():
                missing_backends = []
                for backend in backends:
                    if backend not in BACKENDS_MAPPING:
                        raise ValueError(
                            f"Error: the following backend: '{backend}' was specified around object {module} but isn't specified in the backends mapping."
                        )
                    callable, error = BACKENDS_MAPPING[backend]
                    if not callable():
                        missing_backends.append(backend)
                self._modules = self._modules.union(set(module.keys()))
                for key, values in module.items():
                    if len(missing_backends):
                        self._object_missing_backend[key] = missing_backends
                    for value in values:
                        self._class_to_module[value] = key
                        if len(missing_backends):
                            self._object_missing_backend[value] = missing_backends
                    _import_structure.setdefault(key, []).extend(values)
                # Needed for autocompletion in an IDE
                self.__all__.extend(list(module.keys()) + list(chain(*module.values())))
            self.__file__ = module_file
            self.__spec__ = module_spec
            self.__path__ = [os.path.dirname(module_file)]
            self._objects = {} if extra_objects is None else extra_objects
            self._name = name
            self._import_structure = _import_structure
        # This can be removed once every exportable object has a `export()` export.
        else:
            self._modules = set(import_structure.keys())
            self._class_to_module = {}
            for key, values in import_structure.items():
@ -1657,6 +1718,19 @@ class _LazyModule(ModuleType):
            return self._objects[name]
        if name in self._modules:
            value = self._get_module(name)
        elif name in self._object_missing_backend.keys():
            missing_backends = self._object_missing_backend[name]
            class Placeholder(metaclass=DummyObject):
                _backends = missing_backends
                def __init__(self, *args, **kwargs):
                    requires_backends(self, missing_backends)
            Placeholder.__name__ = name
            Placeholder.__module__ = self.__spec__
            value = Placeholder
        elif name in self._class_to_module.keys():
            module = self._get_module(self._class_to_module[name])
            value = getattr(module, name)
@ -1700,3 +1774,385 @@ def direct_transformers_import(path: str, file="__init__.py") -> ModuleType:
    spec.loader.exec_module(module)
    module = sys.modules[name]
    return module
 def export(*, backends=()):
    """
    This decorator enables two things:
    - Attaching a `__backends` tuple to an object to see what are the necessary backends for it
      to execute correctly without instantiating it
    - The '@export' string is used to dynamically import objects
    """
    for backend in backends:
        if backend not in BACKENDS_MAPPING:
            raise ValueError(f"Backend should be defined in the BACKENDS_MAPPING. Offending backend: {backend}")
    if not isinstance(backends, tuple):
        raise ValueError("Backends should be a tuple.")
    def inner_fn(fun):
        fun.__backends = backends
        return fun
    return inner_fn
 BASE_FILE_REQUIREMENTS = {
    lambda e: "modeling_tf_" in e: ("tf",),
    lambda e: "modeling_flax_" in e: ("flax",),
    lambda e: "modeling_" in e: ("torch",),
    lambda e: e.startswith("tokenization_") and e.endswith("_fast"): ("tokenizers",),
 }
 def fetch__all__(file_content):
    """
    Returns the content of the __all__ variable in the file content.
    Returns None if not defined, otherwise returns a list of strings.
    """
    if "__all__" not in file_content:
        return []
    lines = file_content.splitlines()
    for index, line in enumerate(lines):
        if line.startswith("__all__"):
            start_index = index
    lines = lines[start_index:]
    if not lines[0].startswith("__all__"):
        raise ValueError(
            "fetch__all__ accepts a list of lines, with the first line being the __all__ variable declaration"
        )
    # __all__ is defined on a single line
    if lines[0].endswith("]"):
        return [obj.strip("\"' ") for obj in lines[0].split("=")[1].strip(" []").split(",")]
    # __all__ is defined on multiple lines
    else:
        _all = []
        for __all__line_index in range(1, len(lines)):
            if lines[__all__line_index].strip() == "]":
                return _all
            else:
                _all.append(lines[__all__line_index].strip("\"', "))
        return _all
@lru_cache()
 def create_import_structure_from_path(module_path):
    """
    This method takes the path to a file/a folder and returns the import structure.
    If a file is given, it will return the import structure of the parent folder.
    Import structures are designed to be digestible by `_LazyModule` objects. They are
    created from the __all__ definitions in each files as well as the `@export` decorators
    above methods and objects.
    The import structure allows explicit display of the required backends for a given object.
    These backends are specified in two ways:
    1. Through their `@export`, if they are exported with that decorator. This `@export` decorator
       accepts a `backend` tuple kwarg mentioning which backends are required to run this object.
    2. If an object is defined in a file with "default" backends, it will have, at a minimum, this
       backend specified. The default backends are defined according to the filename:
       - If a file is named like `modeling_*.py`, it will have a `torch` backend
       - If a file is named like `modeling_tf_*.py`, it will have a `tf` backend
       - If a file is named like `modeling_flax_*.py`, it will have a `flax` backend
       - If a file is named like `tokenization_*_fast.py`, it will have a `tokenizers` backend
    Backends serve the purpose of displaying a clear error message to the user in case the backends are not installed.
    Should an object be imported without its required backends being in the environment, any attempt to use the
    object will raise an error mentioning which backend(s) should be added to the environment in order to use
    that object.
    Here's an example of an input import structure at the src.transformers.models level:
    {
        'albert': {
            frozenset(): {
                'configuration_albert': {'AlbertConfig', 'AlbertOnnxConfig'}
            },
            frozenset({'tokenizers'}): {
                'tokenization_albert_fast': {'AlbertTokenizerFast'}
            },
        },
        'align': {
            frozenset(): {
                'configuration_align': {'AlignConfig', 'AlignTextConfig', 'AlignVisionConfig'},
                'processing_align': {'AlignProcessor'}
            },
        },
        'altclip': {
            frozenset(): {
                'configuration_altclip': {'AltCLIPConfig', 'AltCLIPTextConfig', 'AltCLIPVisionConfig'},
                'processing_altclip': {'AltCLIPProcessor'},
            }
        }
    }
    """
    import_structure = {}
    if os.path.isdir(module_path):
        directory = module_path
        adjacent_modules = []
        for f in os.listdir(module_path):
            if f != "__pycache__" and os.path.isdir(os.path.join(module_path, f)):
                import_structure[f] = create_import_structure_from_path(os.path.join(module_path, f))
            elif not os.path.isdir(os.path.join(directory, f)):
                adjacent_modules.append(f)
    else:
        directory = os.path.dirname(module_path)
        adjacent_modules = [f for f in os.listdir(directory) if not os.path.isdir(os.path.join(directory, f))]
    # We're only taking a look at files different from __init__.py
    # We could theoretically export things directly from the __init__.py
    # files, but this is not supported at this time.
    if "__init__.py" in adjacent_modules:
        adjacent_modules.remove("__init__.py")
    module_requirements = {}
    for module_name in adjacent_modules:
        # Only modules ending in `.py` are accepted here.
        if not module_name.endswith(".py"):
            continue
        with open(os.path.join(directory, module_name)) as f:
            file_content = f.read()
        # Remove the .py suffix
        module_name = module_name[:-3]
        previous_line = ""
        previous_index = 0
        # Some files have some requirements by default.
        # For example, any file named `modeling_tf_xxx.py`
        # should have TensorFlow as a required backend.
        base_requirements = ()
        for string_check, requirements in BASE_FILE_REQUIREMENTS.items():
            if string_check(module_name):
                base_requirements = requirements
                break
        # Objects that have a `@export` assigned to them will get exported
        # with the backends specified in the decorator as well as the file backends.
        exported_objects = set()
        if "@export" in file_content:
            lines = file_content.split("\n")
            for index, line in enumerate(lines):
                # This allows exporting items with other decorators. We'll take a look
                # at the line that follows at the same indentation level.
                if line.startswith((" ", "\t", "@", ")")) and not line.startswith("@export"):
                    continue
                # Skipping line enables putting whatever we want between the
                # export() call and the actual class/method definition.
                # This is what enables having # Copied from statements, docs, etc.
                skip_line = False
                if "@export" in previous_line:
                    skip_line = False
                    # Backends are defined on the same line as export
                    if "backends" in previous_line:
                        backends_string = previous_line.split("backends=")[1].split("(")[1].split(")")[0]
                        backends = tuple(sorted([b.strip("'\",") for b in backends_string.split(", ") if b]))
                    # Backends are defined in the lines following export, for example such as:
                    # @export(
                    #     backends=(
                    #             "sentencepiece",
                    #             "torch",
                    #             "tf",
                    #     )
                    # )
                    #
                    # or
                    #
                    # @export(
                    #     backends=(
                    #             "sentencepiece", "tf"
                    #     )
                    # )
                    elif "backends" in lines[previous_index + 1]:
                        backends = []
                        for backend_line in lines[previous_index:index]:
                            if "backends" in backend_line:
                                backend_line = backend_line.split("=")[1]
                            if '"' in backend_line or "'" in backend_line:
                                if ", " in backend_line:
                                    backends.extend(backend.strip("()\"', ") for backend in backend_line.split(", "))
                                else:
                                    backends.append(backend_line.strip("()\"', "))
                            # If the line is only a ')', then we reached the end of the backends and we break.
                            if backend_line.strip() == ")":
                                break
                        backends = tuple(backends)
                    # No backends are registered for export
                    else:
                        backends = ()
                    backends = frozenset(backends + base_requirements)
                    if backends not in module_requirements:
                        module_requirements[backends] = {}
                    if module_name not in module_requirements[backends]:
                        module_requirements[backends][module_name] = set()
                    if not line.startswith("class") and not line.startswith("def"):
                        skip_line = True
                    else:
                        start_index = 6 if line.startswith("class") else 4
                        object_name = line[start_index:].split("(")[0].strip(":")
                        module_requirements[backends][module_name].add(object_name)
                        exported_objects.add(object_name)
                if not skip_line:
                    previous_line = line
                    previous_index = index
        # All objects that are in __all__ should be exported by default.
        # These objects are exported with the file backends.
        if "__all__" in file_content:
            for _all_object in fetch__all__(file_content):
                if _all_object not in exported_objects:
                    backends = frozenset(base_requirements)
                    if backends not in module_requirements:
                        module_requirements[backends] = {}
                    if module_name not in module_requirements[backends]:
                        module_requirements[backends][module_name] = set()
                    module_requirements[backends][module_name].add(_all_object)
    import_structure = {**module_requirements, **import_structure}
    return import_structure
 def spread_import_structure(nested_import_structure):
    """
    This method takes as input an unordered import structure and brings the required backends at the top-level,
    aggregating modules and objects under their required backends.
    Here's an example of an input import structure at the src.transformers.models level:
    {
        'albert': {
            frozenset(): {
                'configuration_albert': {'AlbertConfig', 'AlbertOnnxConfig'}
            },
            frozenset({'tokenizers'}): {
                'tokenization_albert_fast': {'AlbertTokenizerFast'}
            },
        },
        'align': {
            frozenset(): {
                'configuration_align': {'AlignConfig', 'AlignTextConfig', 'AlignVisionConfig'},
                'processing_align': {'AlignProcessor'}
            },
        },
        'altclip': {
            frozenset(): {
                'configuration_altclip': {'AltCLIPConfig', 'AltCLIPTextConfig', 'AltCLIPVisionConfig'},
                'processing_altclip': {'AltCLIPProcessor'},
            }
        }
    }
    Here's an example of an output import structure at the src.transformers.models level:
    {
        frozenset({'tokenizers'}): {
            'albert.tokenization_albert_fast': {'AlbertTokenizerFast'}
        },
        frozenset(): {
            'albert.configuration_albert': {'AlbertConfig', 'AlbertOnnxConfig'},
            'align.processing_align': {'AlignProcessor'},
            'align.configuration_align': {'AlignConfig', 'AlignTextConfig', 'AlignVisionConfig'},
            'altclip.configuration_altclip': {'AltCLIPConfig', 'AltCLIPTextConfig', 'AltCLIPVisionConfig'},
            'altclip.processing_altclip': {'AltCLIPProcessor'}
        }
    }
    """
    def propagate_frozenset(unordered_import_structure):
        tuple_first_import_structure = {}
        for _key, _value in unordered_import_structure.items():
            if not isinstance(_value, dict):
                tuple_first_import_structure[_key] = _value
            elif any(isinstance(v, frozenset) for v in _value.keys()):
                # Here we want to switch around key and v
                for k, v in _value.items():
                    if isinstance(k, frozenset):
                        if k not in tuple_first_import_structure:
                            tuple_first_import_structure[k] = {}
                        tuple_first_import_structure[k][_key] = v
            else:
                tuple_first_import_structure[_key] = propagate_frozenset(_value)
        return tuple_first_import_structure
    def flatten_dict(_dict, previous_key=None):
        items = []
        for _key, _value in _dict.items():
            _key = f"{previous_key}.{_key}" if previous_key is not None else _key
            if isinstance(_value, dict):
                items.extend(flatten_dict(_value, _key).items())
            else:
                items.append((_key, _value))
        return dict(items)
    # The tuples contain the necessary backends. We want these first, so we propagate them up the
    # import structure.
    ordered_import_structure = nested_import_structure
    # 6 is a number that gives us sufficient depth to go through all files and foreseeable folder depths
    # while not taking too long to parse.
    for i in range(6):
        ordered_import_structure = propagate_frozenset(ordered_import_structure)
    # We then flatten the dict so that it references a module path.
    flattened_import_structure = {}
    for key, value in ordered_import_structure.copy().items():
        if isinstance(key, str):
            del ordered_import_structure[key]
        else:
            flattened_import_structure[key] = flatten_dict(value)
    return flattened_import_structure
 def define_import_structure(module_path: str) -> IMPORT_STRUCTURE_T:
    """
    This method takes a module_path as input and creates an import structure digestible by a _LazyModule.
    Here's an example of an output import structure at the src.transformers.models level:
    {
        frozenset({'tokenizers'}): {
            'albert.tokenization_albert_fast': {'AlbertTokenizerFast'}
        },
        frozenset(): {
            'albert.configuration_albert': {'AlbertConfig', 'AlbertOnnxConfig'},
            'align.processing_align': {'AlignProcessor'},
            'align.configuration_align': {'AlignConfig', 'AlignTextConfig', 'AlignVisionConfig'},
            'altclip.configuration_altclip': {'AltCLIPConfig', 'AltCLIPTextConfig', 'AltCLIPVisionConfig'},
            'altclip.processing_altclip': {'AltCLIPProcessor'}
        }
    }
    The import structure is a dict defined with frozensets as keys, and dicts of strings to sets of objects.
    """
    import_structure = create_import_structure_from_path(module_path)
    return spread_import_structure(import_structure)
--- a/tests/models/longformer/test_modeling_longformer.py
+++ b/tests/models/longformer/test_modeling_longformer.py
@ -34,8 +34,8 @@ if is_torch_available():
        LongformerForSequenceClassification,
        LongformerForTokenClassification,
        LongformerModel,
        LongformerSelfAttention,
    )
    from transformers.models.longformer.modeling_longformer import LongformerSelfAttention
 class LongformerModelTester:
--- a/tests/models/longformer/test_modeling_tf_longformer.py
+++ b/tests/models/longformer/test_modeling_tf_longformer.py
@ -37,8 +37,8 @@ if is_tf_available():
        TFLongformerForSequenceClassification,
        TFLongformerForTokenClassification,
        TFLongformerModel,
        TFLongformerSelfAttention,
    )
    from transformers.models.longformer.modeling_tf_longformer import TFLongformerSelfAttention
    from transformers.tf_utils import shape_list
--- a/tests/models/reformer/test_modeling_reformer.py
+++ b/tests/models/reformer/test_modeling_reformer.py
@ -40,11 +40,11 @@ if is_torch_available():
        ReformerForMaskedLM,
        ReformerForQuestionAnswering,
        ReformerForSequenceClassification,
        ReformerLayer,
        ReformerModel,
        ReformerModelWithLMHead,
        ReformerTokenizer,
    )
    from transformers.models.reformer.modeling_reformer import ReformerLayer
 class ReformerModelTester:
--- a/tests/utils/import_structures/failing_export.py
+++ b/tests/utils/import_structures/failing_export.py
@ -0,0 +1,23 @@
 # Copyright 2024 The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # fmt: off
 from transformers.utils.import_utils import export
@export(backends=("random_item_that_should_not_exist",))
 class A0:
    def __init__(self):
        pass
--- a/tests/utils/import_structures/import_structure_raw_register.py
+++ b/tests/utils/import_structures/import_structure_raw_register.py
@ -0,0 +1,80 @@
 # Copyright 2024 The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # fmt: off
 from transformers.utils.import_utils import export
@export()
 class A0:
    def __init__(self):
        pass
@export()
 def a0():
    pass
@export(backends=("torch", "tf"))
 class A1:
    def __init__(self):
        pass
@export(backends=("torch", "tf"))
 def a1():
    pass
@export(
    backends=("torch", "tf")
 )
 class A2:
    def __init__(self):
        pass
@export(
    backends=("torch", "tf")
 )
 def a2():
    pass
@export(
    backends=(
        "torch",
        "tf"
    )
 )
 class A3:
    def __init__(self):
        pass
@export(
    backends=(
            "torch",
            "tf"
    )
 )
 def a3():
    pass
@export(backends=())
 class A4:
    def __init__(self):
        pass
--- a/tests/utils/import_structures/import_structure_register_with_comments.py
+++ b/tests/utils/import_structures/import_structure_register_with_comments.py
@ -0,0 +1,79 @@
 # Copyright 2024 The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # fmt: off
 from transformers.utils.import_utils import export
@export()
 # That's a statement
 class B0:
    def __init__(self):
        pass
@export()
 # That's a statement
 def b0():
    pass
@export(backends=("torch", "tf"))
 # That's a statement
 class B1:
    def __init__(self):
        pass
@export(backends=("torch", "tf"))
 # That's a statement
 def b1():
    pass
@export(backends=("torch", "tf"))
 # That's a statement
 class B2:
    def __init__(self):
        pass
@export(backends=("torch", "tf"))
 # That's a statement
 def b2():
    pass
@export(
    backends=(
        "torch",
        "tf"
    )
 )
 # That's a statement
 class B3:
    def __init__(self):
        pass
@export(
    backends=(
        "torch",
        "tf"
    )
 )
 # That's a statement
 def b3():
    pass
--- a/tests/utils/import_structures/import_structure_register_with_duplicates.py
+++ b/tests/utils/import_structures/import_structure_register_with_duplicates.py
@ -0,0 +1,77 @@
 # Copyright 2024 The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # fmt: off
 from transformers.utils.import_utils import export
@export(backends=("torch", "torch"))
 class C0:
    def __init__(self):
        pass
@export(backends=("torch", "torch"))
 def c0():
    pass
@export(backends=("torch", "torch"))
 # That's a statement
 class C1:
    def __init__(self):
        pass
@export(backends=("torch", "torch"))
 # That's a statement
 def c1():
    pass
@export(backends=("torch", "torch"))
 # That's a statement
 class C2:
    def __init__(self):
        pass
@export(backends=("torch", "torch"))
 # That's a statement
 def c2():
    pass
@export(
    backends=(
        "torch",
        "torch"
    )
 )
 # That's a statement
 class C3:
    def __init__(self):
        pass
@export(
    backends=(
        "torch",
        "torch"
    )
 )
 # That's a statement
 def c3():
    pass
--- a/tests/utils/test_import_structure.py
+++ b/tests/utils/test_import_structure.py
@ -0,0 +1,98 @@
 import os
 import unittest
 from pathlib import Path
 from transformers.utils.import_utils import define_import_structure, spread_import_structure
 import_structures = Path("import_structures")
 def fetch__all__(file_content):
    """
    Returns the content of the __all__ variable in the file content.
    Returns None if not defined, otherwise returns a list of strings.
    """
    lines = file_content.split("\n")
    for line_index in range(len(lines)):
        line = lines[line_index]
        if line.startswith("__all__ = "):
            # __all__ is defined on a single line
            if line.endswith("]"):
                return [obj.strip("\"' ") for obj in line.split("=")[1].strip(" []").split(",")]
            # __all__ is defined on multiple lines
            else:
                _all = []
                for __all__line_index in range(line_index + 1, len(lines)):
                    if lines[__all__line_index].strip() == "]":
                        return _all
                    else:
                        _all.append(lines[__all__line_index].strip("\"', "))
 class TestImportStructures(unittest.TestCase):
    base_transformers_path = Path(__file__).parent.parent.parent
    models_path = base_transformers_path / "src" / "transformers" / "models"
    models_import_structure = spread_import_structure(define_import_structure(models_path))
    def test_definition(self):
        import_structure = define_import_structure(import_structures)
        import_structure_definition = {
            frozenset(()): {
                "import_structure_raw_register": {"A0", "a0", "A4"},
                "import_structure_register_with_comments": {"B0", "b0"},
            },
            frozenset(("tf", "torch")): {
                "import_structure_raw_register": {"A1", "a1", "A2", "a2", "A3", "a3"},
                "import_structure_register_with_comments": {"B1", "b1", "B2", "b2", "B3", "b3"},
            },
            frozenset(("torch",)): {
                "import_structure_register_with_duplicates": {"C0", "c0", "C1", "c1", "C2", "c2", "C3", "c3"},
            },
        }
        self.assertDictEqual(import_structure, import_structure_definition)
    def test_transformers_specific_model_import(self):
        """
        This test ensures that there is equivalence between what is written down in __all__ and what is
        written down with register().
        It doesn't test the backends attributed to register().
        """
        for architecture in os.listdir(self.models_path):
            if (
                os.path.isfile(self.models_path / architecture)
                or architecture.startswith("_")
                or architecture == "deprecated"
            ):
                continue
            with self.subTest(f"Testing arch {architecture}"):
                import_structure = define_import_structure(self.models_path / architecture)
                backend_agnostic_import_structure = {}
                for requirement, module_object_mapping in import_structure.items():
                    for module, objects in module_object_mapping.items():
                        if module not in backend_agnostic_import_structure:
                            backend_agnostic_import_structure[module] = []
                        backend_agnostic_import_structure[module].extend(objects)
                for module, objects in backend_agnostic_import_structure.items():
                    with open(self.models_path / architecture / f"{module}.py") as f:
                        content = f.read()
                        _all = fetch__all__(content)
                        if _all is None:
                            raise ValueError(f"{module} doesn't have __all__ defined.")
                        error_message = (
                            f"self.models_path / architecture / f'{module}.py doesn't seem to be defined correctly:\n"
                            f"Defined in __all__: {sorted(_all)}\nDefined with register: {sorted(objects)}"
                        )
                        self.assertListEqual(sorted(objects), sorted(_all), msg=error_message)
    def test_export_backend_should_be_defined(self):
        with self.assertRaisesRegex(ValueError, "Backend should be defined in the BACKENDS_MAPPING"):
            pass
--- a/utils/custom_init_isort.py
+++ b/utils/custom_init_isort.py
@ -244,7 +244,7 @@ def sort_imports(file: str, check_only: bool = True):
        code = f.read()
    # If the file is not a custom init, there is nothing to do.
-    if "_import_structure" not in code:
+    if "_import_structure" not in code or "define_import_structure" in code:
        return
    # Blocks of indent level 0