mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
using raw string for regex to search <extra_id> (#21162)
* using raw string for regex to search <extra_id> * fix the same issue in test file:`tokenization_t5.py`
This commit is contained in:
parent
8a17da2f7f
commit
8ad06b7c13
@ -214,7 +214,7 @@ class T5Tokenizer(PreTrainedTokenizer):
|
||||
|
||||
def get_sentinel_tokens(self):
|
||||
return list(
|
||||
set(filter(lambda x: bool(re.search("<extra_id_\d+>", x)) is not None, self.additional_special_tokens))
|
||||
set(filter(lambda x: bool(re.search(r"<extra_id_\d+>", x)) is not None, self.additional_special_tokens))
|
||||
)
|
||||
|
||||
def get_sentinel_token_ids(self):
|
||||
|
@ -386,7 +386,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
sentinel_tokens = tokenizer.get_sentinel_tokens()
|
||||
self.assertEquals(len(sentinel_tokens), 10)
|
||||
self.assertListEqual(sorted(sentinel_tokens), sorted([f"<extra_id_{str(i)}>" for i in range(0, 10)]))
|
||||
self.assertTrue([re.search("<extra_id_\d+>", token) is not None for token in sentinel_tokens])
|
||||
self.assertTrue([re.search(r"<extra_id_\d+>", token) is not None for token in sentinel_tokens])
|
||||
|
||||
def test_get_sentinel_token_ids(self):
|
||||
tokenizer = T5Tokenizer(SAMPLE_VOCAB, extra_ids=10)
|
||||
@ -397,7 +397,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
sentinel_tokens = tokenizer.get_sentinel_tokens()
|
||||
self.assertEquals(len(sentinel_tokens), 10)
|
||||
self.assertListEqual(sorted(sentinel_tokens), sorted([f"<extra_id_{str(i)}>" for i in range(0, 10)]))
|
||||
self.assertTrue([re.search("<extra_id_\d+>", token) is not None for token in sentinel_tokens])
|
||||
self.assertTrue([re.search(r"<extra_id_\d+>", token) is not None for token in sentinel_tokens])
|
||||
|
||||
def test_get_sentinel_token_ids_for_fasttokenizer(self):
|
||||
tokenizer = T5TokenizerFast(SAMPLE_VOCAB, extra_ids=10)
|
||||
|
Loading…
Reference in New Issue
Block a user