mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00
Fix Llama 3 TikToken conversion (#33538)
* Fix Llama 3 TikToken conversion * No need to add tokens again
This commit is contained in:
parent
4d8908df27
commit
0c718f16d1
@ -332,7 +332,7 @@ def write_model(
|
||||
|
||||
class Llama3Converter(TikTokenConverter):
|
||||
def __init__(self, vocab_file, special_tokens=None, instruct=False, model_max_length=None, **kwargs):
|
||||
super().__init__(vocab_file, **kwargs)
|
||||
super().__init__(vocab_file, additional_special_tokens=special_tokens, **kwargs)
|
||||
tokenizer = self.converted()
|
||||
chat_template = (
|
||||
"{% set loop_messages = messages %}"
|
||||
@ -345,7 +345,6 @@ class Llama3Converter(TikTokenConverter):
|
||||
"{% endfor %}"
|
||||
"{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"
|
||||
)
|
||||
tokenizer.add_special_tokens(special_tokens)
|
||||
|
||||
self.tokenizer = PreTrainedTokenizerFast(
|
||||
tokenizer_object=tokenizer,
|
||||
|
Loading…
Reference in New Issue
Block a user