Dummies multi backend (#11100)

* Replaces requires_xxx by one generic method

* Quality and update check_dummies

* Fix inits check

* Post-merge cleanup
This commit is contained in:
Sylvain Gugger 2021-04-07 09:56:40 -04:00 committed by GitHub
parent 424419f549
commit 11505fa139
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 1246 additions and 1275 deletions

View File

@ -339,9 +339,6 @@ if is_tokenizers_available():
_import_structure["models.xlnet"].append("XLNetTokenizerFast")
_import_structure["tokenization_utils_fast"] = ["PreTrainedTokenizerFast"]
if is_sentencepiece_available():
_import_structure["convert_slow_tokenizer"] = ["SLOW_TO_FAST_CONVERTERS", "convert_slow_tokenizer"]
else:
from .utils import dummy_tokenizers_objects
@ -349,13 +346,19 @@ else:
name for name in dir(dummy_tokenizers_objects) if not name.startswith("_")
]
if is_sentencepiece_available() and is_tokenizers_available():
_import_structure["convert_slow_tokenizer"] = ["SLOW_TO_FAST_CONVERTERS", "convert_slow_tokenizer"]
else:
from .utils import dummy_sentencepiece_and_tokenizers_objects
_import_structure["utils.dummy_sentencepiece_and_tokenizers_objects"] = [
name for name in dir(dummy_sentencepiece_and_tokenizers_objects) if not name.startswith("_")
]
# Speech-specific objects
if is_speech_available():
_import_structure["models.speech_to_text"].append("Speech2TextFeatureExtractor")
if is_sentencepiece_available():
_import_structure["models.speech_to_text"].append("Speech2TextProcessor")
else:
from .utils import dummy_speech_objects
@ -363,6 +366,15 @@ else:
name for name in dir(dummy_speech_objects) if not name.startswith("_")
]
if is_sentencepiece_available() and is_speech_available():
_import_structure["models.speech_to_text"].append("Speech2TextProcessor")
else:
from .utils import dummy_sentencepiece_and_speech_objects
_import_structure["utils.dummy_sentencepiece_and_speech_objects"] = [
name for name in dir(dummy_sentencepiece_and_speech_objects) if not name.startswith("_")
]
# Vision-specific objects
if is_vision_available():
_import_structure["image_utils"] = ["ImageFeatureExtractionMixin"]
@ -1641,21 +1653,25 @@ if TYPE_CHECKING:
from .models.xlnet import XLNetTokenizerFast
from .tokenization_utils_fast import PreTrainedTokenizerFast
if is_sentencepiece_available():
from .convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS, convert_slow_tokenizer
else:
from .utils.dummy_tokenizers_objects import *
if is_sentencepiece_available() and is_tokenizers_available():
from .convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS, convert_slow_tokenizer
else:
from .utils.dummies_sentencepiece_and_tokenizers_objects import *
if is_speech_available():
from .models.speech_to_text import Speech2TextFeatureExtractor
if is_sentencepiece_available():
from .models.speech_to_text import Speech2TextProcessor
else:
from .utils.dummy_speech_objects import *
if is_speech_available() and is_sentencepiece_available():
from .models.speech_to_text import Speech2TextProcessor
else:
from .utils.dummy_sentencepiece_and_speech_objects import *
if is_vision_available():
from .image_utils import ImageFeatureExtractionMixin
from .models.vit import ViTFeatureExtractor

View File

@ -24,7 +24,7 @@ from typing import Dict, List, Tuple
from tokenizers import Regex, Tokenizer, decoders, normalizers, pre_tokenizers, processors
from tokenizers.models import BPE, Unigram, WordPiece
from .file_utils import requires_protobuf, requires_sentencepiece
from .file_utils import requires_backends
class SentencePieceExtractor:
@ -33,7 +33,7 @@ class SentencePieceExtractor:
"""
def __init__(self, model: str):
requires_sentencepiece(self)
requires_backends(self, "sentencepiece")
from sentencepiece import SentencePieceProcessor
self.sp = SentencePieceProcessor()
@ -298,7 +298,7 @@ class RobertaConverter(Converter):
class SpmConverter(Converter):
def __init__(self, *args):
requires_protobuf(self)
requires_backends(self, "protobuf")
super().__init__(*args)

View File

@ -16,7 +16,7 @@
import warnings
from ...file_utils import is_sklearn_available, requires_sklearn
from ...file_utils import is_sklearn_available, requires_backends
if is_sklearn_available():
@ -34,13 +34,13 @@ DEPRECATION_WARNING = (
def simple_accuracy(preds, labels):
warnings.warn(DEPRECATION_WARNING, FutureWarning)
requires_sklearn(simple_accuracy)
requires_backends(simple_accuracy, "sklearn")
return (preds == labels).mean()
def acc_and_f1(preds, labels):
warnings.warn(DEPRECATION_WARNING, FutureWarning)
requires_sklearn(acc_and_f1)
requires_backends(acc_and_f1, "sklearn")
acc = simple_accuracy(preds, labels)
f1 = f1_score(y_true=labels, y_pred=preds)
return {
@ -52,7 +52,7 @@ def acc_and_f1(preds, labels):
def pearson_and_spearman(preds, labels):
warnings.warn(DEPRECATION_WARNING, FutureWarning)
requires_sklearn(pearson_and_spearman)
requires_backends(pearson_and_spearman, "sklearn")
pearson_corr = pearsonr(preds, labels)[0]
spearman_corr = spearmanr(preds, labels)[0]
return {
@ -64,7 +64,7 @@ def pearson_and_spearman(preds, labels):
def glue_compute_metrics(task_name, preds, labels):
warnings.warn(DEPRECATION_WARNING, FutureWarning)
requires_sklearn(glue_compute_metrics)
requires_backends(glue_compute_metrics, "sklearn")
assert len(preds) == len(labels), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
if task_name == "cola":
return {"mcc": matthews_corrcoef(labels, preds)}
@ -94,7 +94,7 @@ def glue_compute_metrics(task_name, preds, labels):
def xnli_compute_metrics(task_name, preds, labels):
warnings.warn(DEPRECATION_WARNING, FutureWarning)
requires_sklearn(xnli_compute_metrics)
requires_backends(xnli_compute_metrics, "sklearn")
assert len(preds) == len(labels), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
if task_name == "xnli":
return {"acc": simple_accuracy(preds, labels)}

View File

@ -532,82 +532,32 @@ VISION_IMPORT_ERROR = """
"""
def requires_datasets(obj):
BACKENDS_MAPPING = OrderedDict(
[
("datasets", (is_datasets_available, DATASETS_IMPORT_ERROR)),
("faiss", (is_faiss_available, FAISS_IMPORT_ERROR)),
("flax", (is_flax_available, FLAX_IMPORT_ERROR)),
("pandas", (is_pandas_available, PANDAS_IMPORT_ERROR)),
("protobuf", (is_protobuf_available, PROTOBUF_IMPORT_ERROR)),
("scatter", (is_scatter_available, SCATTER_IMPORT_ERROR)),
("sentencepiece", (is_sentencepiece_available, SENTENCEPIECE_IMPORT_ERROR)),
("sklearn", (is_sklearn_available, SKLEARN_IMPORT_ERROR)),
("speech", (is_speech_available, SPEECH_IMPORT_ERROR)),
("tf", (is_tf_available, TENSORFLOW_IMPORT_ERROR)),
("tokenziers", (is_tokenizers_available, TOKENIZERS_IMPORT_ERROR)),
("torch", (is_torch_available, PYTORCH_IMPORT_ERROR)),
("vision", (is_vision_available, VISION_IMPORT_ERROR)),
]
)
def requires_backends(obj, backends):
if not isinstance(backends, (list, tuple)):
backends = [backends]
name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not is_datasets_available():
raise ImportError(DATASETS_IMPORT_ERROR.format(name))
def requires_faiss(obj):
name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not is_faiss_available():
raise ImportError(FAISS_IMPORT_ERROR.format(name))
def requires_pytorch(obj):
name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not is_torch_available():
raise ImportError(PYTORCH_IMPORT_ERROR.format(name))
def requires_sklearn(obj):
name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not is_sklearn_available():
raise ImportError(SKLEARN_IMPORT_ERROR.format(name))
def requires_tf(obj):
name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not is_tf_available():
raise ImportError(TENSORFLOW_IMPORT_ERROR.format(name))
def requires_flax(obj):
name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not is_flax_available():
raise ImportError(FLAX_IMPORT_ERROR.format(name))
def requires_tokenizers(obj):
name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not is_tokenizers_available():
raise ImportError(TOKENIZERS_IMPORT_ERROR.format(name))
def requires_sentencepiece(obj):
name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not is_sentencepiece_available():
raise ImportError(SENTENCEPIECE_IMPORT_ERROR.format(name))
def requires_protobuf(obj):
name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not is_protobuf_available():
raise ImportError(PROTOBUF_IMPORT_ERROR.format(name))
def requires_pandas(obj):
name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not is_pandas_available():
raise ImportError(PANDAS_IMPORT_ERROR.format(name))
def requires_scatter(obj):
name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not is_scatter_available():
raise ImportError(SCATTER_IMPORT_ERROR.format(name))
def requires_speech(obj):
name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not is_speech_available():
raise ImportError(SPEECH_IMPORT_ERROR.format(name))
def requires_vision(obj):
name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not is_vision_available():
raise ImportError(VISION_IMPORT_ERROR.format(name))
if not all(BACKENDS_MAPPING[backend][0]() for backend in backends):
raise ImportError("".join([BACKENDS_MAPPING[backend][1].format(name) for backend in backends]))
def add_start_docstrings(*docstr):

View File

@ -21,14 +21,7 @@ from typing import Iterable, List, Optional, Tuple
import numpy as np
from ...file_utils import (
cached_path,
is_datasets_available,
is_faiss_available,
is_remote_url,
requires_datasets,
requires_faiss,
)
from ...file_utils import cached_path, is_datasets_available, is_faiss_available, is_remote_url, requires_backends
from ...tokenization_utils_base import BatchEncoding
from ...utils import logging
from .configuration_rag import RagConfig
@ -372,8 +365,7 @@ class RagRetriever:
def __init__(self, config, question_encoder_tokenizer, generator_tokenizer, index=None, init_retrieval=True):
self._init_retrieval = init_retrieval
requires_datasets(self)
requires_faiss(self)
requires_backends(self, ["datasets", "faiss"])
super().__init__()
self.index = index or self._build_index(config)
self.generator_tokenizer = generator_tokenizer
@ -411,8 +403,7 @@ class RagRetriever:
@classmethod
def from_pretrained(cls, retriever_name_or_path, indexed_dataset=None, **kwargs):
requires_datasets(cls)
requires_faiss(cls)
requires_backends(cls, ["datasets", "faiss"])
config = kwargs.pop("config", None) or RagConfig.from_pretrained(retriever_name_or_path, **kwargs)
rag_tokenizer = RagTokenizer.from_pretrained(retriever_name_or_path, config=config)
question_encoder_tokenizer = rag_tokenizer.question_encoder

View File

@ -33,7 +33,7 @@ from ...file_utils import (
add_start_docstrings_to_model_forward,
is_scatter_available,
replace_return_docstrings,
requires_scatter,
requires_backends,
)
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, MaskedLMOutput, SequenceClassifierOutput
from ...modeling_utils import (
@ -792,7 +792,7 @@ class TapasModel(TapasPreTrainedModel):
"""
def __init__(self, config, add_pooling_layer=True):
requires_scatter(self)
requires_backends(self, "scatter")
super().__init__(config)
self.config = config

View File

@ -2,7 +2,7 @@ import collections
import numpy as np
from ..file_utils import add_end_docstrings, is_torch_available, requires_pandas
from ..file_utils import add_end_docstrings, is_torch_available, requires_backends
from .base import PIPELINE_INIT_ARGS, ArgumentHandler, Pipeline, PipelineException
@ -24,7 +24,7 @@ class TableQuestionAnsweringArgumentHandler(ArgumentHandler):
# ...,
# {"table": pd.DataFrame, "query" : List[str]}
# ]
requires_pandas(self)
requires_backends(self, "pandas")
import pandas as pd
if table is None:

View File

@ -1,14 +1,14 @@
# This file is autogenerated by the command `make fix-copies`, do not edit.
from ..file_utils import requires_flax
from ..file_utils import requires_backends
class FlaxPreTrainedModel:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
FLAX_MODEL_FOR_MASKED_LM_MAPPING = None
@ -37,153 +37,153 @@ FLAX_MODEL_MAPPING = None
class FlaxAutoModel:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxAutoModelForMaskedLM:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxAutoModelForMultipleChoice:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxAutoModelForNextSentencePrediction:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxAutoModelForPreTraining:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxAutoModelForQuestionAnswering:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxAutoModelForSequenceClassification:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxAutoModelForTokenClassification:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxBertForMaskedLM:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxBertForMultipleChoice:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxBertForNextSentencePrediction:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxBertForPreTraining:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxBertForQuestionAnswering:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxBertForSequenceClassification:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxBertForTokenClassification:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxBertModel:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxBertPreTrainedModel:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
class FlaxRobertaModel:
def __init__(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_flax(self)
requires_backends(self, ["flax"])

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,7 @@
# This file is autogenerated by the command `make fix-copies`, do not edit.
from ..file_utils import requires_backends
class Speech2TextProcessor:
def __init__(self, *args, **kwargs):
requires_backends(self, ["sentencepiece", "speech"])

View File

@ -0,0 +1,9 @@
# This file is autogenerated by the command `make fix-copies`, do not edit.
from ..file_utils import requires_backends
SLOW_TO_FAST_CONVERTERS = None
def convert_slow_tokenizer(*args, **kwargs):
requires_backends(convert_slow_tokenizer, ["sentencepiece", "tokenizers"])

View File

@ -1,155 +1,155 @@
# This file is autogenerated by the command `make fix-copies`, do not edit.
from ..file_utils import requires_sentencepiece
from ..file_utils import requires_backends
class AlbertTokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class BarthezTokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class BertGenerationTokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class CamembertTokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class DebertaV2Tokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class M2M100Tokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class MarianTokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class MBart50Tokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class MBartTokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class MT5Tokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class PegasusTokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class ReformerTokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class Speech2TextTokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class T5Tokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class XLMProphetNetTokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class XLMRobertaTokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
class XLNetTokenizer:
def __init__(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_sentencepiece(self)
requires_backends(self, ["sentencepiece"])

View File

@ -1,12 +1,7 @@
# This file is autogenerated by the command `make fix-copies`, do not edit.
from ..file_utils import requires_speech
from ..file_utils import requires_backends
class Speech2TextFeatureExtractor:
def __init__(self, *args, **kwargs):
requires_speech(self)
class Speech2TextProcessor:
def __init__(self, *args, **kwargs):
requires_speech(self)
requires_backends(self, ["speech"])

File diff suppressed because it is too large Load Diff

View File

@ -1,306 +1,299 @@
# This file is autogenerated by the command `make fix-copies`, do not edit.
from ..file_utils import requires_tokenizers
from ..file_utils import requires_backends
class AlbertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class BartTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class BarthezTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class BertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class CamembertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class ConvBertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class DistilBertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class DPRContextEncoderTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class DPRQuestionEncoderTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class DPRReaderTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class ElectraTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class FunnelTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class GPT2TokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class HerbertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class LayoutLMTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class LEDTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class LongformerTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class LxmertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class MBart50TokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class MBartTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class MobileBertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class MPNetTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class MT5TokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class OpenAIGPTTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class PegasusTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class ReformerTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class RetriBertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class RobertaTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class SqueezeBertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class T5TokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class XLMRobertaTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class XLNetTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
class PreTrainedTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
SLOW_TO_FAST_CONVERTERS = None
def convert_slow_tokenizer(*args, **kwargs):
requires_tokenizers(convert_slow_tokenizer)
requires_backends(self, ["tokenizers"])

View File

@ -1,12 +1,12 @@
# This file is autogenerated by the command `make fix-copies`, do not edit.
from ..file_utils import requires_vision
from ..file_utils import requires_backends
class ImageFeatureExtractionMixin:
def __init__(self, *args, **kwargs):
requires_vision(self)
requires_backends(self, ["vision"])
class ViTFeatureExtractor:
def __init__(self, *args, **kwargs):
requires_vision(self)
requires_backends(self, ["vision"])

View File

@ -22,11 +22,11 @@ import re
# python utils/check_dummies.py
PATH_TO_TRANSFORMERS = "src/transformers"
# Matches is_xxx_available()
_re_backend = re.compile(r"is\_([a-z]*)_available()")
# Matches from xxx import bla
_re_single_line_import = re.compile(r"\s+from\s+\S*\s+import\s+([^\(\s].*)\n")
_re_test_backend = re.compile(r"^\s+if\s+is\_([a-z]*)\_available\(\):\s*$")
BACKENDS = ["torch", "tf", "flax", "sentencepiece", "speech", "tokenizers", "vision"]
_re_test_backend = re.compile(r"^\s+if\s+is\_[a-z]*\_available\(\)")
DUMMY_CONSTANT = """
@ -36,25 +36,34 @@ DUMMY_CONSTANT = """
DUMMY_PRETRAINED_CLASS = """
class {0}:
def __init__(self, *args, **kwargs):
requires_{1}(self)
requires_backends(self, {1})
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_{1}(self)
requires_backends(self, {1})
"""
DUMMY_CLASS = """
class {0}:
def __init__(self, *args, **kwargs):
requires_{1}(self)
requires_backends(self, {1})
"""
DUMMY_FUNCTION = """
def {0}(*args, **kwargs):
requires_{1}({0})
requires_backends({0}, {1})
"""
def find_backend(line):
"""Find one (or multiple) backend in a code line of the init."""
if _re_test_backend.search(line) is None:
return None
backends = [b[0] for b in _re_backend.findall(line)]
backends.sort()
return "_and_".join(backends)
def read_init():
""" Read the init and extracts PyTorch, TensorFlow, SentencePiece and Tokenizers objects. """
with open(os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), "r", encoding="utf-8", newline="\n") as f:
@ -69,14 +78,10 @@ def read_init():
# Go through the end of the file
while line_index < len(lines):
# If the line is an if is_backend_available, we grab all objects associated.
if _re_test_backend.search(lines[line_index]) is not None:
backend = _re_test_backend.search(lines[line_index]).groups()[0]
backend = find_backend(lines[line_index])
if backend is not None:
line_index += 1
# Ignore if backend isn't tracked for dummies.
if backend not in BACKENDS:
continue
objects = []
# Until we unindent, add backend objects to the list
while len(lines[line_index]) <= 1 or lines[line_index].startswith(" " * 8):
@ -128,13 +133,12 @@ def create_dummy_files():
""" Create the content of the dummy files. """
backend_specific_objects = read_init()
# For special correspondence backend to module name as used in the function requires_modulename
module_names = {"torch": "pytorch"}
dummy_files = {}
for backend, objects in backend_specific_objects.items():
backend_name = module_names.get(backend, backend)
backend_name = "[" + ", ".join(f'"{b}"' for b in backend.split("_and_")) + "]"
dummy_file = "# This file is autogenerated by the command `make fix-copies`, do not edit.\n"
dummy_file += f"from ..file_utils import requires_{backend_name}\n\n"
dummy_file += "from ..file_utils import requires_backends\n\n"
dummy_file += "\n".join([create_dummy_object(o, backend_name) for o in objects])
dummy_files[backend] = dummy_file
@ -156,8 +160,11 @@ def check_dummies(overwrite=False):
actual_dummies = {}
for backend, file_path in dummy_file_paths.items():
with open(file_path, "r", encoding="utf-8", newline="\n") as f:
actual_dummies[backend] = f.read()
if os.path.isfile(file_path):
with open(file_path, "r", encoding="utf-8", newline="\n") as f:
actual_dummies[backend] = f.read()
else:
actual_dummies[backend] = ""
for backend in dummy_files.keys():
if dummy_files[backend] != actual_dummies[backend]:

View File

@ -18,12 +18,14 @@ import re
PATH_TO_TRANSFORMERS = "src/transformers"
BACKENDS = ["torch", "tf", "flax", "sentencepiece", "speech", "tokenizers", "vision"]
# Matches is_xxx_available()
_re_backend = re.compile(r"is\_([a-z]*)_available()")
# Catches a line with a key-values pattern: "bla": ["foo", "bar"]
_re_import_struct_key_value = re.compile(r'\s+"\S*":\s+\[([^\]]*)\]')
# Catches a line if is_foo_available
_re_test_backend = re.compile(r"^\s*if\s+is\_([a-z]*)\_available\(\):\s*$")
_re_test_backend = re.compile(r"^\s*if\s+is\_[a-z]*\_available\(\)")
# Catches a line _import_struct["bla"].append("foo")
_re_import_struct_add_one = re.compile(r'^\s*_import_structure\["\S*"\]\.append\("(\S*)"\)')
# Catches a line _import_struct["bla"].extend(["foo", "bar"]) or _import_struct["bla"] = ["foo", "bar"]
@ -36,6 +38,15 @@ _re_between_brackets = re.compile("^\s+\[([^\]]+)\]")
_re_import = re.compile(r"\s+from\s+\S*\s+import\s+([^\(\s].*)\n")
def find_backend(line):
"""Find one (or multiple) backend in a code line of the init."""
if _re_test_backend.search(line) is None:
return None
backends = [b[0] for b in _re_backend.findall(line)]
backends.sort()
return "_and_".join(backends)
def parse_init(init_file):
"""
Read an init_file and parse (per backend) the _import_structure objects defined and the TYPE_CHECKING objects
@ -54,7 +65,7 @@ def parse_init(init_file):
# First grab the objects without a specific backend in _import_structure
objects = []
while not lines[line_index].startswith("if TYPE_CHECKING") and _re_test_backend.search(lines[line_index]) is None:
while not lines[line_index].startswith("if TYPE_CHECKING") and find_backend(lines[line_index]) is None:
line = lines[line_index]
single_line_import_search = _re_import_struct_key_value.search(line)
if single_line_import_search is not None:
@ -68,14 +79,10 @@ def parse_init(init_file):
# Let's continue with backend-specific objects in _import_structure
while not lines[line_index].startswith("if TYPE_CHECKING"):
# If the line is an if is_backend_available, we grab all objects associated.
if _re_test_backend.search(lines[line_index]) is not None:
backend = _re_test_backend.search(lines[line_index]).groups()[0]
backend = find_backend(lines[line_index])
if backend is not None:
line_index += 1
# Ignore if backend isn't tracked for dummies.
if backend not in BACKENDS:
continue
objects = []
# Until we unindent, add backend objects to the list
while len(lines[line_index]) <= 1 or lines[line_index].startswith(" " * 4):
@ -106,7 +113,7 @@ def parse_init(init_file):
objects = []
while (
line_index < len(lines)
and _re_test_backend.search(lines[line_index]) is None
and find_backend(lines[line_index]) is None
and not lines[line_index].startswith("else")
):
line = lines[line_index]
@ -121,14 +128,10 @@ def parse_init(init_file):
# Let's continue with backend-specific objects
while line_index < len(lines):
# If the line is an if is_backemd_available, we grab all objects associated.
if _re_test_backend.search(lines[line_index]) is not None:
backend = _re_test_backend.search(lines[line_index]).groups()[0]
backend = find_backend(lines[line_index])
if backend is not None:
line_index += 1
# Ignore if backend isn't tracked for dummies.
if backend not in BACKENDS:
continue
objects = []
# Until we unindent, add backend objects to the list
while len(lines[line_index]) <= 1 or lines[line_index].startswith(" " * 8):