Add chat_template for tokenizer extracted from GGUF model (#32908)

* add chat_template to gguf tokenizer

* add template through tokenizer config
This commit is contained in:
Isotr0py 2024-08-22 22:41:25 +08:00 committed by GitHub
parent 99d67f1a09
commit ee8c01f839
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -121,8 +121,10 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
gguf_param = load_gguf_checkpoint(kwargs.get("vocab_file"))
architecture = gguf_param["config"]["model_type"]
tokenizer_dict = gguf_param["tokenizer"]
tokenizer_config = gguf_param["tokenizer_config"]
fast_tokenizer, additional_kwargs = convert_gguf_tokenizer(architecture, tokenizer_dict)
kwargs.update(tokenizer_config)
if len(additional_kwargs) > 0:
kwargs.update(additional_kwargs)