From 0c98f24889f4dd7ca9f35f16186b59a66add2654 Mon Sep 17 00:00:00 2001
From: kallewoof <karljohan-alm@garage.co.jp>
Date: Mon, 23 Jun 2025 23:32:16 +0900
Subject: [PATCH] fix: add __bool__ operator to tokenizer to avoid bloated
 asserts (#38899)

* fix: add __bool__ operator to tokenizer to avoid bloated asserts

When a user does 'assert tokenizer' to ensure that the tokenizer is not None, they inadvertently set off a rather expensive process in the '__len__()' operator. This fix adds a trivial '__bool__()' that returns True, so that a None tokenizer asserts and an actual tokenizer returns True when asserted, without calling length op.

* typo
---
 src/transformers/tokenization_utils_fast.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py
index 9249fe5435b..3fecfa0e1dd 100644
--- a/src/transformers/tokenization_utils_fast.py
+++ b/src/transformers/tokenization_utils_fast.py
@@ -278,6 +278,12 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
         """
         return {k.content: v for v, k in sorted(self.added_tokens_decoder.items(), key=lambda item: item[0])}
 
+    def __bool__(self) -> bool:
+        """
+        Returns True, to avoid expensive `assert tokenizer` gotchas.
+        """
+        return True
+
     def __len__(self) -> int:
         """
         Size of the full vocabulary with the added tokens.