Fix format mistake in string repr of tokenizer objects (#34493)

* fix repr string format for tokenizer objects The repr of tokenizer tokens looks confusing and just stupid, like this: `Tokenizer(...), added_tokens_decoder={1: ..., 2: ...}`. The dict that is the value of the added_tokens_decoder attribute is outside of the parentheses of the tokenizer object, whereas all other attributes are inside the parentheses like they should be. This commit fixes this bug. * cos: add newline before closing parenthesis of repr string
2025-07-30 17:52:35 +06:00 · 2024-10-30 10:03:41 +01:00 · 2024-10-30 10:03:41 +01:00 · 25a9fc584a
commit 25a9fc584a
parent cd277618d4
1 changed files with 2 additions and 2 deletions
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@ -1687,8 +1687,8 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
            f"{self.__class__.__name__}(name_or_path='{self.name_or_path}',"
            f" vocab_size={self.vocab_size}, model_max_length={self.model_max_length}, is_fast={self.is_fast},"
            f" padding_side='{self.padding_side}', truncation_side='{self.truncation_side}',"
-            f" special_tokens={self.special_tokens_map}, clean_up_tokenization_spaces={self.clean_up_tokenization_spaces}), "
-            " added_tokens_decoder={\n\t" + added_tokens_decoder_rep + "\n}"
+            f" special_tokens={self.special_tokens_map}, clean_up_tokenization_spaces={self.clean_up_tokenization_spaces},"
+            " added_tokens_decoder={\n\t" + added_tokens_decoder_rep + "\n}\n)"
        )

    def __len__(self) -> int: