Support single token decode for CodeGenTokenizer (#28628)

convert token id to list in .decode()
This commit is contained in:
cmathw 2024-01-23 15:27:24 +00:00 committed by GitHub
parent 5b5e71dc41
commit 9a4521dd9b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -23,7 +23,7 @@ from typing import TYPE_CHECKING, List, Optional, Tuple, Union
import numpy as np
import regex as re
from ...utils import is_tf_available, is_torch_available, logging
from ...utils import is_tf_available, is_torch_available, logging, to_py_obj
if TYPE_CHECKING:
@ -352,6 +352,9 @@ class CodeGenTokenizer(PreTrainedTokenizer):
Returns:
`str`: The decoded sentence.
"""
token_ids = to_py_obj(token_ids)
decoded_text = super()._decode(
token_ids=token_ids,
skip_special_tokens=skip_special_tokens,