mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-02 12:20:05 +06:00
fix: add __bool__ operator to tokenizer to avoid bloated asserts (#38899)
* fix: add __bool__ operator to tokenizer to avoid bloated asserts When a user does 'assert tokenizer' to ensure that the tokenizer is not None, they inadvertently set off a rather expensive process in the '__len__()' operator. This fix adds a trivial '__bool__()' that returns True, so that a None tokenizer asserts and an actual tokenizer returns True when asserted, without calling length op. * typo
This commit is contained in:
parent
d29482cc91
commit
0c98f24889
@ -278,6 +278,12 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
||||
"""
|
||||
return {k.content: v for v, k in sorted(self.added_tokens_decoder.items(), key=lambda item: item[0])}
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
"""
|
||||
Returns True, to avoid expensive `assert tokenizer` gotchas.
|
||||
"""
|
||||
return True
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""
|
||||
Size of the full vocabulary with the added tokens.
|
||||
|
Loading…
Reference in New Issue
Block a user