Explicitly setting encoding in tokenization_utils_base.py (#38553)

Update tokenization_utils_base.py

Add encoding explicitly
This commit is contained in:
Muqi Li 2025-06-03 20:08:35 +08:00 committed by GitHub
parent caf708da1b
commit 8cb96787a6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2086,7 +2086,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
chat_template_file = resolved_vocab_files.pop("chat_template_file", None)
extra_chat_templates = [key for key in resolved_vocab_files if key.startswith("chat_template_")]
if chat_template_file is not None:
with open(chat_template_file) as chat_template_handle:
with open(chat_template_file, encoding="utf-8") as chat_template_handle:
chat_templates["default"] = chat_template_handle.read()
for extra_chat_template in extra_chat_templates:
template_file = resolved_vocab_files.pop(extra_chat_template, None)