mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 10:12:23 +06:00
Overwrite get_clean_sequence as this was causing a bottleneck (#13183)
This commit is contained in:
parent
143738214c
commit
588e6caa15
@ -15,6 +15,7 @@
|
||||
|
||||
|
||||
import unittest
|
||||
from typing import Tuple
|
||||
|
||||
from transformers import AddedToken, LukeTokenizer
|
||||
from transformers.testing_utils import require_torch, slow
|
||||
@ -81,6 +82,11 @@ class Luke(TokenizerTesterMixin, unittest.TestCase):
|
||||
assert encoded_sentence == encoded_text_from_decode
|
||||
assert encoded_pair == encoded_pair_from_decode
|
||||
|
||||
def get_clean_sequence(self, tokenizer, max_length=20) -> Tuple[str, list]:
|
||||
txt = "Beyonce lives in Los Angeles"
|
||||
ids = tokenizer.encode(txt, add_special_tokens=False)
|
||||
return txt, ids
|
||||
|
||||
def test_space_encoding(self):
|
||||
tokenizer = self.get_tokenizer()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user