Support single token decode for CodeGenTokenizer (#28628)

convert token id to list in .decode()
2025-08-03 03:31:05 +06:00 · 2024-01-23 15:27:24 +00:00 · 2024-01-23 15:27:24 +00:00 · 9a4521dd9b
commit 9a4521dd9b
parent 5b5e71dc41
1 changed files with 4 additions and 1 deletions
--- a/src/transformers/models/codegen/tokenization_codegen.py
+++ b/src/transformers/models/codegen/tokenization_codegen.py
@ -23,7 +23,7 @@ from typing import TYPE_CHECKING, List, Optional, Tuple, Union
 import numpy as np
 import regex as re

-from ...utils import is_tf_available, is_torch_available, logging
+from ...utils import is_tf_available, is_torch_available, logging, to_py_obj


 if TYPE_CHECKING:
@ -352,6 +352,9 @@ class CodeGenTokenizer(PreTrainedTokenizer):
        Returns:
            `str`: The decoded sentence.
        """
+
+        token_ids = to_py_obj(token_ids)
+
        decoded_text = super()._decode(
            token_ids=token_ids,
            skip_special_tokens=skip_special_tokens,