add build_inputs_with_special_tokens to LlamaFast (#26297)

* add build_inputs_with_special_tokens to LlamaFast

* fixup

* Update src/transformers/models/llama/tokenization_llama_fast.py
This commit is contained in:
Arthur 2023-10-02 18:30:44 +02:00 committed by GitHub
parent bab3331906
commit c20d90d577
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -249,3 +249,16 @@ class LlamaTokenizerFast(PreTrainedTokenizerFast):
template = template.replace("DEFAULT_SYSTEM_MESSAGE", default_message) template = template.replace("DEFAULT_SYSTEM_MESSAGE", default_message)
return template return template
# TODO ArthurZ let's rely on the template processor instead, refactor all fast tokenizers
# Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.build_inputs_with_special_tokens
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
bos_token_id = [self.bos_token_id] if self.add_bos_token else []
eos_token_id = [self.eos_token_id] if self.add_eos_token else []
output = bos_token_id + token_ids_0 + eos_token_id
if token_ids_1 is not None:
output = output + bos_token_id + token_ids_1 + eos_token_id
return output