Place BigBirdTokenizer in sentencepiece-only objects (#12975)

This commit is contained in:
Sylvain Gugger 2021-08-02 08:26:38 +02:00 committed by GitHub
parent b5995badc9
commit c1a65385a1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 2 deletions

View File

@ -157,7 +157,7 @@ _import_structure = {
"models.bert_generation": ["BertGenerationConfig"],
"models.bert_japanese": ["BertJapaneseTokenizer", "CharacterTokenizer", "MecabTokenizer"],
"models.bertweet": ["BertweetTokenizer"],
"models.big_bird": ["BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP", "BigBirdConfig", "BigBirdTokenizer"],
"models.big_bird": ["BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP", "BigBirdConfig"],
"models.bigbird_pegasus": [
"BIGBIRD_PEGASUS_PRETRAINED_CONFIG_ARCHIVE_MAP",
"BigBirdPegasusConfig",
@ -308,6 +308,7 @@ if is_sentencepiece_available():
_import_structure["models.albert"].append("AlbertTokenizer")
_import_structure["models.barthez"].append("BarthezTokenizer")
_import_structure["models.bert_generation"].append("BertGenerationTokenizer")
_import_structure["models.big_bird"].append("BigBirdTokenizer")
_import_structure["models.camembert"].append("CamembertTokenizer")
_import_structure["models.deberta_v2"].append("DebertaV2Tokenizer")
_import_structure["models.m2m_100"].append("M2M100Tokenizer")
@ -1822,7 +1823,7 @@ if TYPE_CHECKING:
from .models.bert_generation import BertGenerationConfig
from .models.bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer
from .models.bertweet import BertweetTokenizer
from .models.big_bird import BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP, BigBirdConfig, BigBirdTokenizer
from .models.big_bird import BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP, BigBirdConfig
from .models.bigbird_pegasus import BIGBIRD_PEGASUS_PRETRAINED_CONFIG_ARCHIVE_MAP, BigBirdPegasusConfig
from .models.blenderbot import BLENDERBOT_PRETRAINED_CONFIG_ARCHIVE_MAP, BlenderbotConfig, BlenderbotTokenizer
from .models.blenderbot_small import (
@ -1971,6 +1972,7 @@ if TYPE_CHECKING:
from .models.albert import AlbertTokenizer
from .models.barthez import BarthezTokenizer
from .models.bert_generation import BertGenerationTokenizer
from .models.big_bird import BigBirdTokenizer
from .models.camembert import CamembertTokenizer
from .models.deberta_v2 import DebertaV2Tokenizer
from .models.m2m_100 import M2M100Tokenizer

View File

@ -29,6 +29,15 @@ class BertGenerationTokenizer:
requires_backends(cls, ["sentencepiece"])
class BigBirdTokenizer:
def __init__(self, *args, **kwargs):
requires_backends(self, ["sentencepiece"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["sentencepiece"])
class CamembertTokenizer:
def __init__(self, *args, **kwargs):
requires_backends(self, ["sentencepiece"])