From f6d5046af1051a7f89384f19e9fd92049f964d44 Mon Sep 17 00:00:00 2001 From: Funtowicz Morgan Date: Tue, 2 Jun 2020 09:02:27 +0000 Subject: [PATCH] Override get_vocab for fast tokenizer. (#4717) --- src/transformers/tokenization_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index 9e137b853d4..c3711e640d6 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -2368,6 +2368,9 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer): def _convert_id_to_token(self, index: int) -> Optional[str]: return self._tokenizer.id_to_token(int(index)) + def get_vocab(self): + return self._tokenizer.get_vocab(True) + def convert_tokens_to_string(self, tokens: List[int], skip_special_tokens: bool = False) -> str: return self._tokenizer.decode(tokens, skip_special_tokens)