diff --git a/tests/test_tokenization_luke.py b/tests/test_tokenization_luke.py index 84bf52a0f3b..148e7de27bc 100644 --- a/tests/test_tokenization_luke.py +++ b/tests/test_tokenization_luke.py @@ -15,6 +15,7 @@ import unittest +from typing import Tuple from transformers import AddedToken, LukeTokenizer from transformers.testing_utils import require_torch, slow @@ -81,6 +82,11 @@ class Luke(TokenizerTesterMixin, unittest.TestCase): assert encoded_sentence == encoded_text_from_decode assert encoded_pair == encoded_pair_from_decode + def get_clean_sequence(self, tokenizer, max_length=20) -> Tuple[str, list]: + txt = "Beyonce lives in Los Angeles" + ids = tokenizer.encode(txt, add_special_tokens=False) + return txt, ids + def test_space_encoding(self): tokenizer = self.get_tokenizer()