From 3723f30a1867d9ed26a9a10c3fe494c00cf56a0c Mon Sep 17 00:00:00 2001 From: Sam Shleifer Date: Fri, 5 Jun 2020 14:57:24 -0400 Subject: [PATCH] [cleanup] MarianTokenizer: delete unused constants (#4802) --- src/transformers/tokenization_marian.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/transformers/tokenization_marian.py b/src/transformers/tokenization_marian.py index 4203fec09d7..a13fed09d08 100644 --- a/src/transformers/tokenization_marian.py +++ b/src/transformers/tokenization_marian.py @@ -7,7 +7,6 @@ from typing import Dict, List, Optional, Tuple, Union import sentencepiece -from .file_utils import S3_BUCKET_PREFIX from .tokenization_utils import BatchEncoding, PreTrainedTokenizer @@ -17,11 +16,6 @@ vocab_files_names = { "vocab": "vocab.json", "tokenizer_config_file": "tokenizer_config.json", } -MODEL_NAMES = ("opus-mt-en-de",) # TODO(SS): delete this, the only required constant is vocab_files_names -PRETRAINED_VOCAB_FILES_MAP = { - k: {m: f"{S3_BUCKET_PREFIX}/Helsinki-NLP/{m}/{fname}" for m in MODEL_NAMES} - for k, fname in vocab_files_names.items() -} # Example URL https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/vocab.json @@ -41,8 +35,6 @@ class MarianTokenizer(PreTrainedTokenizer): """ vocab_files_names = vocab_files_names - pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP - max_model_input_sizes = {m: 512 for m in MODEL_NAMES} model_input_names = ["attention_mask"] # actually attention_mask, decoder_attention_mask language_code_re = re.compile(">>.+<<") # type: re.Pattern