S2S + M2M100 should be available in tokenization_auto (#10657)

* S2S + M2M100 should be available in tokenization_auto

* Requires sentencepiece

* SentencePiece for S2T as well :)
This commit is contained in:
Lysandre Debut 2021-03-11 09:53:36 -05:00 committed by GitHub
parent 602d63f05c
commit 6d9e11a193
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -80,6 +80,7 @@ from .configuration_auto import (
LEDConfig,
LongformerConfig,
LxmertConfig,
M2M100Config,
MarianConfig,
MBartConfig,
MobileBertConfig,
@ -92,6 +93,7 @@ from .configuration_auto import (
ReformerConfig,
RetriBertConfig,
RobertaConfig,
Speech2TextConfig,
SqueezeBertConfig,
T5Config,
TapasConfig,
@ -111,11 +113,13 @@ if is_sentencepiece_available():
from ..bert_generation.tokenization_bert_generation import BertGenerationTokenizer
from ..camembert.tokenization_camembert import CamembertTokenizer
from ..deberta_v2.tokenization_deberta_v2 import DebertaV2Tokenizer
from ..m2m_100 import M2M100Tokenizer
from ..marian.tokenization_marian import MarianTokenizer
from ..mbart.tokenization_mbart import MBartTokenizer
from ..mt5 import MT5Tokenizer
from ..pegasus.tokenization_pegasus import PegasusTokenizer
from ..reformer.tokenization_reformer import ReformerTokenizer
from ..speech_to_text import Speech2TextTokenizer
from ..t5.tokenization_t5 import T5Tokenizer
from ..xlm_prophetnet.tokenization_xlm_prophetnet import XLMProphetNetTokenizer
from ..xlm_roberta.tokenization_xlm_roberta import XLMRobertaTokenizer
@ -135,6 +139,8 @@ else:
XLMRobertaTokenizer = None
XLNetTokenizer = None
XLMProphetNetTokenizer = None
M2M100Tokenizer = None
Speech2TextTokenizer = None
if is_tokenizers_available():
from ..albert.tokenization_albert_fast import AlbertTokenizerFast
@ -197,6 +203,7 @@ else:
XLMRobertaTokenizerFast = None
XLNetTokenizerFast = None
logger = logging.get_logger(__name__)
@ -240,6 +247,8 @@ TOKENIZER_MAPPING = OrderedDict(
(DebertaV2Config, (DebertaV2Tokenizer, None)),
(RagConfig, (RagTokenizer, None)),
(XLMProphetNetConfig, (XLMProphetNetTokenizer, None)),
(Speech2TextConfig, (Speech2TextTokenizer, None)),
(M2M100Config, (M2M100Tokenizer, None)),
(ProphetNetConfig, (ProphetNetTokenizer, None)),
(MPNetConfig, (MPNetTokenizer, MPNetTokenizerFast)),
(TapasConfig, (TapasTokenizer, None)),