FastPreTrainedTokenizer => PreTrainedTokenizerFast

This commit is contained in:
Anthony MOI 2019-12-26 14:39:39 -05:00
parent 1f82a5d910
commit 7ead04ce14
No known key found for this signature in database
GPG Key ID: CB646B1164C636A0
3 changed files with 6 additions and 6 deletions

View File

@ -22,7 +22,7 @@ import unicodedata
import tokenizers as tk
from .tokenization_utils import FastPreTrainedTokenizer, PreTrainedTokenizer
from .tokenization_utils import PreTrainedTokenizerFast, PreTrainedTokenizer
logger = logging.getLogger(__name__)
@ -529,7 +529,7 @@ def _is_punctuation(char):
return False
class BertTokenizerFast(FastPreTrainedTokenizer):
class BertTokenizerFast(PreTrainedTokenizerFast):
vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION

View File

@ -23,7 +23,7 @@ from functools import lru_cache
import regex as re
import tokenizers as tk
from .tokenization_utils import FastPreTrainedTokenizer, PreTrainedTokenizer
from .tokenization_utils import PreTrainedTokenizerFast, PreTrainedTokenizer
logger = logging.getLogger(__name__)
@ -249,7 +249,7 @@ class GPT2Tokenizer(PreTrainedTokenizer):
return vocab_file, merge_file
class GPT2TokenizerFast(FastPreTrainedTokenizer):
class GPT2TokenizerFast(PreTrainedTokenizerFast):
vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES

View File

@ -1412,9 +1412,9 @@ class PreTrainedTokenizer(object):
return out_string
class FastPreTrainedTokenizer(PreTrainedTokenizer):
class PreTrainedTokenizerFast(PreTrainedTokenizer):
def __init__(self, **kwargs):
super(FastPreTrainedTokenizer, self).__init__(**kwargs)
super(PreTrainedTokenizerFast, self).__init__(**kwargs)
@property
def tokenizer(self):