Override get_vocab for fast tokenizer. (#4717)

This commit is contained in:
Funtowicz Morgan 2020-06-02 09:02:27 +00:00 committed by GitHub
parent 88762a2f8c
commit f6d5046af1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2368,6 +2368,9 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer):
def _convert_id_to_token(self, index: int) -> Optional[str]:
return self._tokenizer.id_to_token(int(index))
def get_vocab(self):
return self._tokenizer.get_vocab(True)
def convert_tokens_to_string(self, tokens: List[int], skip_special_tokens: bool = False) -> str:
return self._tokenizer.decode(tokens, skip_special_tokens)