diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 6a63a49c6a0..9520c3cc01c 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -51,7 +51,12 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name # Base objects, independent of any specific backend _import_structure = { + "benchmark": [], + "commands": [], "configuration_utils": ["PretrainedConfig"], + "convert_graph_to_onnx": [], + "convert_slow_tokenizers_checkpoints_to_fast": [], + "convert_tf_hub_seq_to_seq_bert_to_pytorch": [], "data": [ "DataProcessor", "InputExample", @@ -84,6 +89,11 @@ _import_structure = { "DefaultDataCollator", "default_data_collator", ], + "data.metrics": [], + "data.processors": [], + "debug_utils": [], + "dependency_versions_check": [], + "dependency_versions_table": [], "feature_extraction_sequence_utils": ["SequenceFeatureExtractor"], "feature_extraction_utils": ["BatchFeature"], "file_utils": [ @@ -179,6 +189,7 @@ _import_structure = { "BlenderbotSmallConfig", "BlenderbotSmallTokenizer", ], + "models.bort": [], "models.byt5": ["ByT5Tokenizer"], "models.camembert": ["CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "CamembertConfig"], "models.canine": ["CANINE_PRETRAINED_CONFIG_ARCHIVE_MAP", "CanineConfig", "CanineTokenizer"], @@ -196,6 +207,7 @@ _import_structure = { "models.deberta_v2": ["DEBERTA_V2_PRETRAINED_CONFIG_ARCHIVE_MAP", "DebertaV2Config"], "models.deit": ["DEIT_PRETRAINED_CONFIG_ARCHIVE_MAP", "DeiTConfig"], "models.detr": ["DETR_PRETRAINED_CONFIG_ARCHIVE_MAP", "DetrConfig"], + "models.dialogpt": [], "models.distilbert": ["DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "DistilBertConfig", "DistilBertTokenizer"], "models.dpr": [ "DPR_PRETRAINED_CONFIG_ARCHIVE_MAP", @@ -236,6 +248,7 @@ _import_structure = { "models.mbart": ["MBartConfig"], "models.mbart50": [], "models.megatron_bert": ["MEGATRON_BERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "MegatronBertConfig"], + "models.megatron_gpt2": [], "models.mluke": [], "models.mmbt": ["MMBTConfig"], "models.mobilebert": ["MOBILEBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "MobileBertConfig", "MobileBertTokenizer"], @@ -316,6 +329,7 @@ _import_structure = { "models.xlm_prophetnet": ["XLM_PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP", "XLMProphetNetConfig"], "models.xlm_roberta": ["XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP", "XLMRobertaConfig"], "models.xlnet": ["XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP", "XLNetConfig"], + "onnx": [], "pipelines": [ "AudioClassificationPipeline", "AutomaticSpeechRecognitionPipeline", @@ -343,6 +357,7 @@ _import_structure = { "ZeroShotClassificationPipeline", "pipeline", ], + "testing_utils": [], "tokenization_utils": ["PreTrainedTokenizer"], "tokenization_utils_base": [ "AddedToken", @@ -567,6 +582,7 @@ else: # PyTorch-backed objects if is_torch_available(): + _import_structure["activations"] = [] _import_structure["benchmark.benchmark"] = ["PyTorchBenchmark"] _import_structure["benchmark.benchmark_args"] = ["PyTorchBenchmarkArguments"] _import_structure["data.datasets"] = [ @@ -580,6 +596,7 @@ if is_torch_available(): "TextDataset", "TextDatasetForNextSentencePrediction", ] + _import_structure["deepspeed"] = [] _import_structure["generation_beam_search"] = ["BeamScorer", "BeamSearchScorer"] _import_structure["generation_logits_process"] = [ "ForcedBOSTokenLogitsProcessor", @@ -1455,6 +1472,7 @@ if is_torch_available(): "get_polynomial_decay_schedule_with_warmup", "get_scheduler", ] + _import_structure["sagemaker"] = [] _import_structure["trainer"] = ["Trainer"] _import_structure["trainer_pt_utils"] = ["torch_distributed_zero_first"] _import_structure["trainer_seq2seq"] = ["Seq2SeqTrainer"] @@ -1465,6 +1483,7 @@ else: # TensorFlow-backed objects if is_tf_available(): + _import_structure["activations_tf"] = [] _import_structure["benchmark.benchmark_args_tf"] = ["TensorFlowBenchmarkArguments"] _import_structure["benchmark.benchmark_tf"] = ["TensorFlowBenchmark"] _import_structure["generation_tf_utils"] = ["tf_top_k_top_p_filtering"] @@ -2129,6 +2148,7 @@ else: name for name in dir(dummy_flax_objects) if not name.startswith("_") ] + # Direct imports for type-checking if TYPE_CHECKING: # Configuration diff --git a/utils/check_inits.py b/utils/check_inits.py index 8cfbfc18a4a..438d657df1e 100644 --- a/utils/check_inits.py +++ b/utils/check_inits.py @@ -14,8 +14,10 @@ # limitations under the License. import collections +import importlib.util import os import re +from pathlib import Path PATH_TO_TRANSFORMERS = "src/transformers" @@ -202,5 +204,58 @@ def check_all_inits(): raise ValueError("\n\n".join(failures)) +def get_transformers_submodules(): + """ + Returns the list of Transformers submodules. + """ + submodules = [] + for path, directories, files in os.walk(PATH_TO_TRANSFORMERS): + for folder in directories: + if folder.startswith("_"): + directories.remove(folder) + continue + short_path = str((Path(path) / folder).relative_to(PATH_TO_TRANSFORMERS)) + submodule = short_path.replace(os.path.sep, ".") + submodules.append(submodule) + for fname in files: + if fname == "__init__.py": + continue + short_path = str((Path(path) / fname).relative_to(PATH_TO_TRANSFORMERS)) + submodule = short_path.replace(os.path.sep, ".").replace(".py", "") + if len(submodule.split(".")) == 1: + submodules.append(submodule) + return submodules + + +IGNORE_SUBMODULES = [ + "convert_pytorch_checkpoint_to_tf2", + "modeling_flax_pytorch_utils", +] + + +def check_submodules(): + # This is to make sure the transformers module imported is the one in the repo. + spec = importlib.util.spec_from_file_location( + "transformers", + os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), + submodule_search_locations=[PATH_TO_TRANSFORMERS], + ) + transformers = spec.loader.load_module() + + module_not_registered = [ + module + for module in get_transformers_submodules() + if module not in IGNORE_SUBMODULES and module not in transformers._import_structure.keys() + ] + if len(module_not_registered) > 0: + list_of_modules = "\n".join(f"- {module}" for module in module_not_registered) + raise ValueError( + "The following submodules are not properly registed in the main init of Transformers:\n" + f"{list_of_modules}\n" + "Make sure they appear somewhere in the keys of `_import_structure` with an empty list as value." + ) + + if __name__ == "__main__": check_all_inits() + check_submodules()