From 8cb96787a6bd87f43ff651b8ac40974c1fe75a7c Mon Sep 17 00:00:00 2001 From: Muqi Li Date: Tue, 3 Jun 2025 20:08:35 +0800 Subject: [PATCH] Explicitly setting encoding in tokenization_utils_base.py (#38553) Update tokenization_utils_base.py Add encoding explicitly --- src/transformers/tokenization_utils_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index b6ed3c677b6..1a0b70e10ad 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2086,7 +2086,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): chat_template_file = resolved_vocab_files.pop("chat_template_file", None) extra_chat_templates = [key for key in resolved_vocab_files if key.startswith("chat_template_")] if chat_template_file is not None: - with open(chat_template_file) as chat_template_handle: + with open(chat_template_file, encoding="utf-8") as chat_template_handle: chat_templates["default"] = chat_template_handle.read() for extra_chat_template in extra_chat_templates: template_file = resolved_vocab_files.pop(extra_chat_template, None)