mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-02 03:01:07 +06:00
FastPreTrainedTokenizer => PreTrainedTokenizerFast
This commit is contained in:
parent
1f82a5d910
commit
7ead04ce14
@ -22,7 +22,7 @@ import unicodedata
|
|||||||
|
|
||||||
import tokenizers as tk
|
import tokenizers as tk
|
||||||
|
|
||||||
from .tokenization_utils import FastPreTrainedTokenizer, PreTrainedTokenizer
|
from .tokenization_utils import PreTrainedTokenizerFast, PreTrainedTokenizer
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@ -529,7 +529,7 @@ def _is_punctuation(char):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
class BertTokenizerFast(FastPreTrainedTokenizer):
|
class BertTokenizerFast(PreTrainedTokenizerFast):
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
vocab_files_names = VOCAB_FILES_NAMES
|
||||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||||
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
||||||
|
@ -23,7 +23,7 @@ from functools import lru_cache
|
|||||||
import regex as re
|
import regex as re
|
||||||
import tokenizers as tk
|
import tokenizers as tk
|
||||||
|
|
||||||
from .tokenization_utils import FastPreTrainedTokenizer, PreTrainedTokenizer
|
from .tokenization_utils import PreTrainedTokenizerFast, PreTrainedTokenizer
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@ -249,7 +249,7 @@ class GPT2Tokenizer(PreTrainedTokenizer):
|
|||||||
return vocab_file, merge_file
|
return vocab_file, merge_file
|
||||||
|
|
||||||
|
|
||||||
class GPT2TokenizerFast(FastPreTrainedTokenizer):
|
class GPT2TokenizerFast(PreTrainedTokenizerFast):
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
vocab_files_names = VOCAB_FILES_NAMES
|
||||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||||
|
@ -1412,9 +1412,9 @@ class PreTrainedTokenizer(object):
|
|||||||
return out_string
|
return out_string
|
||||||
|
|
||||||
|
|
||||||
class FastPreTrainedTokenizer(PreTrainedTokenizer):
|
class PreTrainedTokenizerFast(PreTrainedTokenizer):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super(FastPreTrainedTokenizer, self).__init__(**kwargs)
|
super(PreTrainedTokenizerFast, self).__init__(**kwargs)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tokenizer(self):
|
def tokenizer(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user