From 74a3cebfa51b539bfcfa79b33686cc090b7074e8 Mon Sep 17 00:00:00 2001
From: Matt <Rocketknight1@users.noreply.github.com>
Date: Mon, 27 Nov 2023 18:40:10 +0000
Subject: [PATCH] Update chat template warnings/guides (#27634)

* Update default ChatML template

* Update docs/warnings

* Update docs/source/en/chat_templating.md

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>

* Slight rework

---------

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
---
 docs/source/en/chat_templating.md           | 5 ++++-
 src/transformers/tokenization_utils_base.py | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/docs/source/en/chat_templating.md b/docs/source/en/chat_templating.md
index 82bdf591ae5..a478c32e6ff 100644
--- a/docs/source/en/chat_templating.md
+++ b/docs/source/en/chat_templating.md
@@ -376,7 +376,10 @@ input formats. Our default template for models that don't have a class-specific
 ```
 
 If you like this one, here it is in one-liner form, ready to copy into your code. The one-liner also includes
-handy support for "generation prompts" - see the next section for more!
+handy support for [generation prompts](#what-are-generation-prompts), but note that it doesn't add BOS or EOS tokens!
+If your model expects those, they won't be added automatically by `apply_chat_template` - in other words, the
+text will be tokenized with `add_special_tokens=False`. This is to avoid potential conflicts between the template and
+the `add_special_tokens` logic. If your model expects special tokens, make sure to add them to the template!
 
 ```
 tokenizer.chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 723de720a5f..9d68cb3ae58 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -1786,7 +1786,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
         """
         logger.warning_once(
             "\nNo chat template is defined for this tokenizer - using a default chat template "
-            "that implements the ChatML format. If the default is not appropriate for "
+            "that implements the ChatML format (without BOS/EOS tokens!). If the default is not appropriate for "
             "your model, please set `tokenizer.chat_template` to an appropriate template. "
             "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
         )