mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-23 14:29:01 +06:00
Fix Llama 3 TikToken conversion (#33538)
* Fix Llama 3 TikToken conversion * No need to add tokens again
This commit is contained in:
parent
4d8908df27
commit
0c718f16d1
@ -332,7 +332,7 @@ def write_model(
|
|||||||
|
|
||||||
class Llama3Converter(TikTokenConverter):
|
class Llama3Converter(TikTokenConverter):
|
||||||
def __init__(self, vocab_file, special_tokens=None, instruct=False, model_max_length=None, **kwargs):
|
def __init__(self, vocab_file, special_tokens=None, instruct=False, model_max_length=None, **kwargs):
|
||||||
super().__init__(vocab_file, **kwargs)
|
super().__init__(vocab_file, additional_special_tokens=special_tokens, **kwargs)
|
||||||
tokenizer = self.converted()
|
tokenizer = self.converted()
|
||||||
chat_template = (
|
chat_template = (
|
||||||
"{% set loop_messages = messages %}"
|
"{% set loop_messages = messages %}"
|
||||||
@ -345,7 +345,6 @@ class Llama3Converter(TikTokenConverter):
|
|||||||
"{% endfor %}"
|
"{% endfor %}"
|
||||||
"{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"
|
"{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"
|
||||||
)
|
)
|
||||||
tokenizer.add_special_tokens(special_tokens)
|
|
||||||
|
|
||||||
self.tokenizer = PreTrainedTokenizerFast(
|
self.tokenizer = PreTrainedTokenizerFast(
|
||||||
tokenizer_object=tokenizer,
|
tokenizer_object=tokenizer,
|
||||||
|
Loading…
Reference in New Issue
Block a user