Enable some Jinja extensions and add datetime capabilities (#32684)

* Add new Jinja features: - Do extension - Break/continue in loops - Call strftime to get current datetime in any format * Add new Jinja features: - Do extension - Break/continue in loops - Call strftime to get current datetime in any format * Fix strftime template * Add template strip() just to be safe * Remove the do extension to make porting easier, and also because it's the least useful * Rename test * strftime -> strftime_now * Split test * Update test to use strftime_now * Refactor everything out into chat_template_utils * Refactor everything out into chat_template_utils * Refactor everything out into chat_template_utils * Refactor everything out into chat_template_utils * Refactor everything out into chat_template_utils
2025-07-31 02:02:21 +06:00 · 2024-08-23 14:26:12 +01:00 · 2024-08-23 14:26:12 +01:00 · 371b9c1486
commit 371b9c1486
parent adb91179b9
3 changed files with 151 additions and 92 deletions
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@ -27,7 +27,6 @@ from collections import UserDict
 from collections.abc import Mapping, Sized
 from contextlib import contextmanager
 from dataclasses import dataclass
-from functools import lru_cache
 from inspect import isfunction
 from typing import TYPE_CHECKING, Any, Dict, List, NamedTuple, Optional, Sequence, Tuple, Union

@ -65,6 +64,7 @@ from .utils import (
    requires_backends,
    to_py_obj,
 )
+from .utils.chat_template_utils import _compile_jinja_template, _render_with_assistant_indices


 if TYPE_CHECKING:
@ -1791,7 +1791,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
            )

        # Compilation function uses a cache to avoid recompiling the same template
-        compiled_template = self._compile_jinja_template(chat_template)
+        compiled_template = _compile_jinja_template(chat_template)

        if isinstance(conversation, (list, tuple)) and (
            isinstance(conversation[0], (list, tuple)) or hasattr(conversation[0], "messages")
@ -1831,7 +1831,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
                # Indicates it's a Conversation object
                chat = chat.messages
            if return_assistant_tokens_mask:
-                rendered_chat, generation_indices = self._render_with_assistant_indices(
+                rendered_chat, generation_indices = _render_with_assistant_indices(
                    compiled_template=compiled_template,
                    messages=chat,
                    tools=tool_schemas,
@ -1888,94 +1888,6 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        else:
            return rendered

-    def _render_with_assistant_indices(
-        self, compiled_template, messages, tools, documents, add_generation_prompt, **template_kwargs
-    ):
-        rendered_blocks = []
-        generation_indices = []
-        with compiled_template.environment.activate_tracker(rendered_blocks, generation_indices):
-            for block in compiled_template.generate(
-                messages=messages,
-                tools=tools,
-                documents=documents,
-                add_generation_prompt=add_generation_prompt,
-                **template_kwargs,
-            ):
-                rendered_blocks.append(block)
-            rendered_chat = "".join(rendered_blocks)
-        return rendered_chat, generation_indices
-
-    @lru_cache
-    def _compile_jinja_template(self, chat_template):
-        try:
-            import jinja2
-            from jinja2 import nodes
-            from jinja2.exceptions import TemplateError
-            from jinja2.ext import Extension
-            from jinja2.sandbox import ImmutableSandboxedEnvironment
-        except ImportError:
-            raise ImportError("apply_chat_template requires jinja2 to be installed.")
-
-        if version.parse(jinja2.__version__) < version.parse("3.1.0"):
-            raise ImportError(
-                "apply_chat_template requires jinja2>=3.1.0 to be installed. Your version is " f"{jinja2.__version__}."
-            )
-
-        def raise_exception(message):
-            raise TemplateError(message)
-
-        def tojson(x, ensure_ascii=False, indent=None, separators=None, sort_keys=False):
-            # We override the built-in tojson filter because Jinja's default filter escapes HTML characters
-            # We also expose some options like custom indents and separators
-            return json.dumps(x, ensure_ascii=ensure_ascii, indent=indent, separators=separators, sort_keys=sort_keys)
-
-        class AssistantTracker(Extension):
-            # This extension is used to track the indices of assistant-generated tokens in the rendered chat
-            tags = {"generation"}
-
-            def __init__(self, environment: ImmutableSandboxedEnvironment):
-                # The class is only initiated by jinja.
-                super().__init__(environment)
-                environment.extend(activate_tracker=self.activate_tracker)
-                self._rendered_blocks = None
-                self._generation_indices = None
-
-            def parse(self, parser: jinja2.parser.Parser) -> jinja2.nodes.CallBlock:
-                lineno = next(parser.stream).lineno
-                body = parser.parse_statements(["name:endgeneration"], drop_needle=True)
-                return nodes.CallBlock(self.call_method("_generation_support"), [], [], body).set_lineno(lineno)
-
-            @jinja2.pass_eval_context
-            def _generation_support(self, context: jinja2.nodes.EvalContext, caller: jinja2.runtime.Macro) -> str:
-                rv = caller()
-                if self.is_active():
-                    # Only track generation indices if the tracker is active
-                    start_index = len("".join(self._rendered_blocks))
-                    end_index = start_index + len(rv)
-                    self._generation_indices.append((start_index, end_index))
-                return rv
-
-            def is_active(self) -> bool:
-                return self._rendered_blocks or self._generation_indices
-
-            @contextmanager
-            def activate_tracker(self, rendered_blocks: List[int], generation_indices: List[int]):
-                try:
-                    if self.is_active():
-                        raise ValueError("AssistantTracker should not be reused before closed")
-                    self._rendered_blocks = rendered_blocks
-                    self._generation_indices = generation_indices
-
-                    yield
-                finally:
-                    self._rendered_blocks = None
-                    self._generation_indices = None
-
-        jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True, extensions=[AssistantTracker])
-        jinja_env.filters["tojson"] = tojson
-        jinja_env.globals["raise_exception"] = raise_exception
-        return jinja_env.from_string(chat_template)
-
    def get_chat_template(self, chat_template: Optional[str] = None, tools: Optional[List[Dict]] = None) -> str:
        """
        Retrieve the chat template string used for tokenizing chat messages. This template is used
--- a/src/transformers/utils/chat_template_utils.py
+++ b/src/transformers/utils/chat_template_utils.py
@ -15,7 +15,22 @@
 import inspect
 import json
 import re
-from typing import Any, Callable, Dict, Optional, Tuple, Union, get_args, get_origin, get_type_hints
+from contextlib import contextmanager
+from datetime import datetime
+from functools import lru_cache
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union, get_args, get_origin, get_type_hints
+
+from packaging import version
+
+from .import_utils import is_jinja_available
+
+
+if is_jinja_available():
+    import jinja2
+    from jinja2.ext import Extension
+    from jinja2.sandbox import ImmutableSandboxedEnvironment
+else:
+    jinja2 = None


 BASIC_TYPES = (int, float, str, bool, Any, type(None), ...)
@ -314,3 +329,90 @@ def get_json_schema(func: Callable) -> Dict:
    if return_dict is not None:
        output["return"] = return_dict
    return {"type": "function", "function": output}
+
+
+def _render_with_assistant_indices(
+    compiled_template, messages, tools, documents, add_generation_prompt, **template_kwargs
+):
+    rendered_blocks = []
+    generation_indices = []
+    with compiled_template.environment.activate_tracker(rendered_blocks, generation_indices):
+        for block in compiled_template.generate(
+            messages=messages,
+            tools=tools,
+            documents=documents,
+            add_generation_prompt=add_generation_prompt,
+            **template_kwargs,
+        ):
+            rendered_blocks.append(block)
+        rendered_chat = "".join(rendered_blocks)
+    return rendered_chat, generation_indices
+
+
+@lru_cache
+def _compile_jinja_template(chat_template):
+    class AssistantTracker(Extension):
+        # This extension is used to track the indices of assistant-generated tokens in the rendered chat
+        tags = {"generation"}
+
+        def __init__(self, environment: ImmutableSandboxedEnvironment):
+            # The class is only initiated by jinja.
+            super().__init__(environment)
+            environment.extend(activate_tracker=self.activate_tracker)
+            self._rendered_blocks = None
+            self._generation_indices = None
+
+        def parse(self, parser: jinja2.parser.Parser) -> jinja2.nodes.CallBlock:
+            lineno = next(parser.stream).lineno
+            body = parser.parse_statements(["name:endgeneration"], drop_needle=True)
+            return jinja2.nodes.CallBlock(self.call_method("_generation_support"), [], [], body).set_lineno(lineno)
+
+        @jinja2.pass_eval_context
+        def _generation_support(self, context: jinja2.nodes.EvalContext, caller: jinja2.runtime.Macro) -> str:
+            rv = caller()
+            if self.is_active():
+                # Only track generation indices if the tracker is active
+                start_index = len("".join(self._rendered_blocks))
+                end_index = start_index + len(rv)
+                self._generation_indices.append((start_index, end_index))
+            return rv
+
+        def is_active(self) -> bool:
+            return self._rendered_blocks or self._generation_indices
+
+        @contextmanager
+        def activate_tracker(self, rendered_blocks: List[int], generation_indices: List[int]):
+            try:
+                if self.is_active():
+                    raise ValueError("AssistantTracker should not be reused before closed")
+                self._rendered_blocks = rendered_blocks
+                self._generation_indices = generation_indices
+
+                yield
+            finally:
+                self._rendered_blocks = None
+                self._generation_indices = None
+
+    if version.parse(jinja2.__version__) < version.parse("3.1.0"):
+        raise ImportError(
+            "apply_chat_template requires jinja2>=3.1.0 to be installed. Your version is " f"{jinja2.__version__}."
+        )
+
+    def raise_exception(message):
+        raise jinja2.exceptions.TemplateError(message)
+
+    def tojson(x, ensure_ascii=False, indent=None, separators=None, sort_keys=False):
+        # We override the built-in tojson filter because Jinja's default filter escapes HTML characters
+        # We also expose some options like custom indents and separators
+        return json.dumps(x, ensure_ascii=ensure_ascii, indent=indent, separators=separators, sort_keys=sort_keys)
+
+    def strftime_now(format):
+        return datetime.now().strftime(format)
+
+    jinja_env = ImmutableSandboxedEnvironment(
+        trim_blocks=True, lstrip_blocks=True, extensions=[AssistantTracker, jinja2.ext.loopcontrols]
+    )
+    jinja_env.filters["tojson"] = tojson
+    jinja_env.globals["raise_exception"] = raise_exception
+    jinja_env.globals["strftime_now"] = strftime_now
+    return jinja_env.from_string(chat_template)
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@ -1153,6 +1153,51 @@ class TokenizerTesterMixin:
                    dummy_conversations, chat_template=dummy_template, tokenize=True
                )  # Check that no error raised

+    @require_jinja
+    def test_jinja_loopcontrols(self):
+        break_template = """
+        {%- for message in messages %}
+            {{- message.role + " " + message.content }}
+            {%- if loop.first %}
+                {%- break %}
+            {%- endif %}
+        {%- endfor %}""".strip()
+
+        dummy_conversation = [
+            {"role": "system", "content": "1"},
+            {"role": "user", "content": "2"},
+            {"role": "assistant", "content": "3"},
+        ]
+
+        tokenizers = self.get_tokenizers()
+        for tokenizer in tokenizers:
+            with self.subTest(f"{tokenizer.__class__.__name__}"):
+                break_output = tokenizer.apply_chat_template(
+                    dummy_conversation, chat_template=break_template, tokenize=False
+                )
+                self.assertEqual(break_output, "system 1")  # Loop should break after first iter
+
+    @require_jinja
+    def test_jinja_strftime(self):
+        strftime_template = """{{- strftime_now("%Y-%m-%d") }}""".strip()
+
+        dummy_conversation = [
+            {"role": "system", "content": "1"},
+            {"role": "user", "content": "2"},
+            {"role": "assistant", "content": "3"},
+        ]
+
+        tokenizers = self.get_tokenizers()
+        for tokenizer in tokenizers:
+            with self.subTest(f"{tokenizer.__class__.__name__}"):
+                strftime_output = tokenizer.apply_chat_template(
+                    dummy_conversation, chat_template=strftime_template, tokenize=False
+                )
+
+                # Assert that we get a date formatted as expected
+                self.assertEqual(len(strftime_output), 10)
+                self.assertEqual(len(strftime_output.split("-")), 3)
+
    @require_jinja
    def test_chat_template_return_assistant_tokens_mask(self):
        dummy_template = (