mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-25 15:28:59 +06:00
BartTokenizerFast (#4878)
This commit is contained in:
parent
403d309857
commit
9208f57b16
@ -118,7 +118,7 @@ from .pipelines import (
|
||||
# Tokenizers
|
||||
from .tokenization_albert import AlbertTokenizer
|
||||
from .tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
|
||||
from .tokenization_bart import BartTokenizer, MBartTokenizer
|
||||
from .tokenization_bart import BartTokenizer, BartTokenizerFast, MBartTokenizer
|
||||
from .tokenization_bert import BasicTokenizer, BertTokenizer, BertTokenizerFast, WordpieceTokenizer
|
||||
from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer
|
||||
from .tokenization_camembert import CamembertTokenizer
|
||||
|
@ -16,7 +16,7 @@
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
|
||||
from .tokenization_roberta import RobertaTokenizer
|
||||
from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
|
||||
from .tokenization_utils import BatchEncoding
|
||||
from .tokenization_xlm_roberta import XLMRobertaTokenizer
|
||||
|
||||
@ -44,6 +44,15 @@ class BartTokenizer(RobertaTokenizer):
|
||||
}
|
||||
|
||||
|
||||
class BartTokenizerFast(RobertaTokenizerFast):
|
||||
# merges and vocab same as Roberta
|
||||
max_model_input_sizes = {m: 1024 for m in _all_bart_models}
|
||||
pretrained_vocab_files_map = {
|
||||
"vocab_file": {m: vocab_url for m in _all_bart_models},
|
||||
"merges_file": {m: merges_url for m in _all_bart_models},
|
||||
}
|
||||
|
||||
|
||||
_all_mbart_models = ["facebook/mbart-large-en-ro"]
|
||||
SPM_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/sentence.bpe.model"
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user