mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-03 03:31:05 +06:00
LongformerTokenizerFast (#4547)
This commit is contained in:
parent
c9c385c522
commit
5139733623
@ -139,7 +139,7 @@ from .tokenization_distilbert import DistilBertTokenizer, DistilBertTokenizerFas
|
||||
from .tokenization_electra import ElectraTokenizer, ElectraTokenizerFast
|
||||
from .tokenization_flaubert import FlaubertTokenizer
|
||||
from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
|
||||
from .tokenization_longformer import LongformerTokenizer
|
||||
from .tokenization_longformer import LongformerTokenizer, LongformerTokenizerFast
|
||||
from .tokenization_openai import OpenAIGPTTokenizer, OpenAIGPTTokenizerFast
|
||||
from .tokenization_reformer import ReformerTokenizer
|
||||
from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
import logging
|
||||
|
||||
from .tokenization_roberta import RobertaTokenizer
|
||||
from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -40,3 +40,12 @@ class LongformerTokenizer(RobertaTokenizer):
|
||||
"vocab_file": {m: vocab_url for m in _all_longformer_models},
|
||||
"merges_file": {m: merges_url for m in _all_longformer_models},
|
||||
}
|
||||
|
||||
|
||||
class LongformerTokenizerFast(RobertaTokenizerFast):
|
||||
# merges and vocab same as Roberta
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
pretrained_vocab_files_map = {
|
||||
"vocab_file": {m: vocab_url for m in _all_longformer_models},
|
||||
"merges_file": {m: merges_url for m in _all_longformer_models},
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user