Improve type checker performance (#13094)

* conditional declare `TOKENIZER_MAPPING_NAMES` within a `if TYPE_CHECKING` block so that type checkers dont need to evaluate the RHS of the assignment.

this improves performance of the pylance/pyright type checkers

* Update src/transformers/models/auto/tokenization_auto.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* adding missing import

* format

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
Bill Schnurr 2021-08-12 09:45:54 -07:00 committed by GitHub
parent b9962b8656
commit e46ad22cd6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -18,7 +18,7 @@ import importlib
import json import json
import os import os
from collections import OrderedDict from collections import OrderedDict
from typing import Dict, Optional, Union from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...file_utils import ( from ...file_utils import (
@ -43,153 +43,163 @@ from .configuration_auto import (
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
if TYPE_CHECKING:
TOKENIZER_MAPPING_NAMES = OrderedDict( # This significantly improves completion suggestion performance when
[ # the transformers package is used with Microsoft's Pylance language server.
("retribert", ("RetriBertTokenizer", "RetriBertTokenizerFast" if is_tokenizers_available() else None)), TOKENIZER_MAPPING_NAMES: OrderedDict[str, Tuple[Optional[str], Optional[str]]] = OrderedDict()
("roformer", ("RoFormerTokenizer", "RoFormerTokenizerFast" if is_tokenizers_available() else None)), else:
( TOKENIZER_MAPPING_NAMES = OrderedDict(
"t5", [
("retribert", ("RetriBertTokenizer", "RetriBertTokenizerFast" if is_tokenizers_available() else None)),
("roformer", ("RoFormerTokenizer", "RoFormerTokenizerFast" if is_tokenizers_available() else None)),
( (
"T5Tokenizer" if is_sentencepiece_available() else None, "t5",
"T5TokenizerFast" if is_tokenizers_available() else None, (
"T5Tokenizer" if is_sentencepiece_available() else None,
"T5TokenizerFast" if is_tokenizers_available() else None,
),
), ),
),
(
"mt5",
( (
"MT5Tokenizer" if is_sentencepiece_available() else None, "mt5",
"MT5TokenizerFast" if is_tokenizers_available() else None, (
"MT5Tokenizer" if is_sentencepiece_available() else None,
"MT5TokenizerFast" if is_tokenizers_available() else None,
),
), ),
), ("mobilebert", ("MobileBertTokenizer", "MobileBertTokenizerFast" if is_tokenizers_available() else None)),
("mobilebert", ("MobileBertTokenizer", "MobileBertTokenizerFast" if is_tokenizers_available() else None)), ("distilbert", ("DistilBertTokenizer", "DistilBertTokenizerFast" if is_tokenizers_available() else None)),
("distilbert", ("DistilBertTokenizer", "DistilBertTokenizerFast" if is_tokenizers_available() else None)),
(
"albert",
( (
"AlbertTokenizer" if is_sentencepiece_available() else None, "albert",
"AlbertTokenizerFast" if is_tokenizers_available() else None, (
"AlbertTokenizer" if is_sentencepiece_available() else None,
"AlbertTokenizerFast" if is_tokenizers_available() else None,
),
), ),
),
(
"camembert",
( (
"CamembertTokenizer" if is_sentencepiece_available() else None, "camembert",
"CamembertTokenizerFast" if is_tokenizers_available() else None, (
"CamembertTokenizer" if is_sentencepiece_available() else None,
"CamembertTokenizerFast" if is_tokenizers_available() else None,
),
), ),
),
(
"pegasus",
( (
"PegasusTokenizer" if is_sentencepiece_available() else None, "pegasus",
"PegasusTokenizerFast" if is_tokenizers_available() else None, (
"PegasusTokenizer" if is_sentencepiece_available() else None,
"PegasusTokenizerFast" if is_tokenizers_available() else None,
),
), ),
),
(
"mbart",
( (
"MBartTokenizer" if is_sentencepiece_available() else None, "mbart",
"MBartTokenizerFast" if is_tokenizers_available() else None, (
"MBartTokenizer" if is_sentencepiece_available() else None,
"MBartTokenizerFast" if is_tokenizers_available() else None,
),
), ),
),
(
"xlm-roberta",
( (
"XLMRobertaTokenizer" if is_sentencepiece_available() else None, "xlm-roberta",
"XLMRobertaTokenizerFast" if is_tokenizers_available() else None, (
"XLMRobertaTokenizer" if is_sentencepiece_available() else None,
"XLMRobertaTokenizerFast" if is_tokenizers_available() else None,
),
), ),
), ("marian", ("MarianTokenizer" if is_sentencepiece_available() else None, None)),
("marian", ("MarianTokenizer" if is_sentencepiece_available() else None, None)), ("blenderbot-small", ("BlenderbotSmallTokenizer", None)),
("blenderbot-small", ("BlenderbotSmallTokenizer", None)), ("blenderbot", ("BlenderbotTokenizer", None)),
("blenderbot", ("BlenderbotTokenizer", None)), ("bart", ("BartTokenizer", "BartTokenizerFast")),
("bart", ("BartTokenizer", "BartTokenizerFast")), ("longformer", ("LongformerTokenizer", "LongformerTokenizerFast" if is_tokenizers_available() else None)),
("longformer", ("LongformerTokenizer", "LongformerTokenizerFast" if is_tokenizers_available() else None)), ("roberta", ("RobertaTokenizer", "RobertaTokenizerFast" if is_tokenizers_available() else None)),
("roberta", ("RobertaTokenizer", "RobertaTokenizerFast" if is_tokenizers_available() else None)),
(
"reformer",
( (
"ReformerTokenizer" if is_sentencepiece_available() else None, "reformer",
"ReformerTokenizerFast" if is_tokenizers_available() else None, (
"ReformerTokenizer" if is_sentencepiece_available() else None,
"ReformerTokenizerFast" if is_tokenizers_available() else None,
),
), ),
), ("electra", ("ElectraTokenizer", "ElectraTokenizerFast" if is_tokenizers_available() else None)),
("electra", ("ElectraTokenizer", "ElectraTokenizerFast" if is_tokenizers_available() else None)), ("funnel", ("FunnelTokenizer", "FunnelTokenizerFast" if is_tokenizers_available() else None)),
("funnel", ("FunnelTokenizer", "FunnelTokenizerFast" if is_tokenizers_available() else None)), ("lxmert", ("LxmertTokenizer", "LxmertTokenizerFast" if is_tokenizers_available() else None)),
("lxmert", ("LxmertTokenizer", "LxmertTokenizerFast" if is_tokenizers_available() else None)), ("layoutlm", ("LayoutLMTokenizer", "LayoutLMTokenizerFast" if is_tokenizers_available() else None)),
("layoutlm", ("LayoutLMTokenizer", "LayoutLMTokenizerFast" if is_tokenizers_available() else None)),
(
"dpr",
("DPRQuestionEncoderTokenizer", "DPRQuestionEncoderTokenizerFast" if is_tokenizers_available() else None),
),
("squeezebert", ("SqueezeBertTokenizer", "SqueezeBertTokenizerFast" if is_tokenizers_available() else None)),
("bert", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
("openai-gpt", ("OpenAIGPTTokenizer", "OpenAIGPTTokenizerFast" if is_tokenizers_available() else None)),
("gpt2", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)),
("transfo-xl", ("TransfoXLTokenizer", None)),
(
"xlnet",
( (
"XLNetTokenizer" if is_sentencepiece_available() else None, "dpr",
"XLNetTokenizerFast" if is_tokenizers_available() else None, (
"DPRQuestionEncoderTokenizer",
"DPRQuestionEncoderTokenizerFast" if is_tokenizers_available() else None,
),
), ),
),
("flaubert", ("FlaubertTokenizer", None)),
("xlm", ("XLMTokenizer", None)),
("ctrl", ("CTRLTokenizer", None)),
("fsmt", ("FSMTTokenizer", None)),
("bert-generation", ("BertGenerationTokenizer" if is_sentencepiece_available() else None, None)),
("deberta", ("DebertaTokenizer", "DebertaTokenizerFast" if is_tokenizers_available() else None)),
("deberta-v2", ("DebertaV2Tokenizer" if is_sentencepiece_available() else None, None)),
("rag", ("RagTokenizer", None)),
("xlm-prophetnet", ("XLMProphetNetTokenizer" if is_sentencepiece_available() else None, None)),
("speech_to_text", ("Speech2TextTokenizer" if is_sentencepiece_available() else None, None)),
("m2m_100", ("M2M100Tokenizer" if is_sentencepiece_available() else None, None)),
("prophetnet", ("ProphetNetTokenizer", None)),
("mpnet", ("MPNetTokenizer", "MPNetTokenizerFast" if is_tokenizers_available() else None)),
("tapas", ("TapasTokenizer", None)),
("led", ("LEDTokenizer", "LEDTokenizerFast" if is_tokenizers_available() else None)),
("convbert", ("ConvBertTokenizer", "ConvBertTokenizerFast" if is_tokenizers_available() else None)),
(
"big_bird",
( (
"BigBirdTokenizer" if is_sentencepiece_available() else None, "squeezebert",
"BigBirdTokenizerFast" if is_tokenizers_available() else None, ("SqueezeBertTokenizer", "SqueezeBertTokenizerFast" if is_tokenizers_available() else None),
), ),
), ("bert", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
("ibert", ("RobertaTokenizer", "RobertaTokenizerFast" if is_tokenizers_available() else None)), ("openai-gpt", ("OpenAIGPTTokenizer", "OpenAIGPTTokenizerFast" if is_tokenizers_available() else None)),
("wav2vec2", ("Wav2Vec2CTCTokenizer", None)), ("gpt2", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)),
("hubert", ("Wav2Vec2CTCTokenizer", None)), ("transfo-xl", ("TransfoXLTokenizer", None)),
("gpt_neo", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)),
("luke", ("LukeTokenizer", None)),
("bigbird_pegasus", ("PegasusTokenizer", "PegasusTokenizerFast" if is_tokenizers_available() else None)),
("canine", ("CanineTokenizer", None)),
("bertweet", ("BertweetTokenizer", None)),
("bert-japanese", ("BertJapaneseTokenizer", None)),
("byt5", ("ByT5Tokenizer", None)),
(
"cpm",
( (
"CpmTokenizer" if is_sentencepiece_available() else None, "xlnet",
"CpmTokenizerFast" if is_tokenizers_available() else None, (
"XLNetTokenizer" if is_sentencepiece_available() else None,
"XLNetTokenizerFast" if is_tokenizers_available() else None,
),
), ),
), ("flaubert", ("FlaubertTokenizer", None)),
("herbert", ("HerbertTokenizer", "HerbertTokenizerFast" if is_tokenizers_available() else None)), ("xlm", ("XLMTokenizer", None)),
("phobert", ("PhobertTokenizer", None)), ("ctrl", ("CTRLTokenizer", None)),
( ("fsmt", ("FSMTTokenizer", None)),
"barthez", ("bert-generation", ("BertGenerationTokenizer" if is_sentencepiece_available() else None, None)),
("deberta", ("DebertaTokenizer", "DebertaTokenizerFast" if is_tokenizers_available() else None)),
("deberta-v2", ("DebertaV2Tokenizer" if is_sentencepiece_available() else None, None)),
("rag", ("RagTokenizer", None)),
("xlm-prophetnet", ("XLMProphetNetTokenizer" if is_sentencepiece_available() else None, None)),
("speech_to_text", ("Speech2TextTokenizer" if is_sentencepiece_available() else None, None)),
("m2m_100", ("M2M100Tokenizer" if is_sentencepiece_available() else None, None)),
("prophetnet", ("ProphetNetTokenizer", None)),
("mpnet", ("MPNetTokenizer", "MPNetTokenizerFast" if is_tokenizers_available() else None)),
("tapas", ("TapasTokenizer", None)),
("led", ("LEDTokenizer", "LEDTokenizerFast" if is_tokenizers_available() else None)),
("convbert", ("ConvBertTokenizer", "ConvBertTokenizerFast" if is_tokenizers_available() else None)),
( (
"BarthezTokenizer" if is_sentencepiece_available() else None, "big_bird",
"BarthezTokenizerFast" if is_tokenizers_available() else None, (
"BigBirdTokenizer" if is_sentencepiece_available() else None,
"BigBirdTokenizerFast" if is_tokenizers_available() else None,
),
), ),
), ("ibert", ("RobertaTokenizer", "RobertaTokenizerFast" if is_tokenizers_available() else None)),
( ("wav2vec2", ("Wav2Vec2CTCTokenizer", None)),
"mbart50", ("hubert", ("Wav2Vec2CTCTokenizer", None)),
("gpt_neo", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)),
("luke", ("LukeTokenizer", None)),
("bigbird_pegasus", ("PegasusTokenizer", "PegasusTokenizerFast" if is_tokenizers_available() else None)),
("canine", ("CanineTokenizer", None)),
("bertweet", ("BertweetTokenizer", None)),
("bert-japanese", ("BertJapaneseTokenizer", None)),
("byt5", ("ByT5Tokenizer", None)),
( (
"MBart50Tokenizer" if is_sentencepiece_available() else None, "cpm",
"MBart50TokenizerFast" if is_tokenizers_available() else None, (
"CpmTokenizer" if is_sentencepiece_available() else None,
"CpmTokenizerFast" if is_tokenizers_available() else None,
),
), ),
), ("herbert", ("HerbertTokenizer", "HerbertTokenizerFast" if is_tokenizers_available() else None)),
] ("phobert", ("PhobertTokenizer", None)),
) (
"barthez",
(
"BarthezTokenizer" if is_sentencepiece_available() else None,
"BarthezTokenizerFast" if is_tokenizers_available() else None,
),
),
(
"mbart50",
(
"MBart50Tokenizer" if is_sentencepiece_available() else None,
"MBart50TokenizerFast" if is_tokenizers_available() else None,
),
),
]
)
TOKENIZER_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TOKENIZER_MAPPING_NAMES) TOKENIZER_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TOKENIZER_MAPPING_NAMES)