use diff internal model in tests (#33387)

* use diff internal model in tests

* use diff internal model in tests
This commit is contained in:
Ita Zaporozhets 2024-09-11 05:27:00 -04:00 committed by GitHub
parent f38590dade
commit 781bbc4d98
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -846,7 +846,8 @@ class TikTokenIntegrationTests(unittest.TestCase):
"""
def test_tiktoken_llama(self):
model_path = "hf-internal-testing/Llama3-Instruct-Internal"
model_path = "hf-internal-testing/llama-3-8b-internal"
subfolder = "original"
test_text = "This is a test sentence."
test_tokens = [128000, 2028, 374, 264, 1296, 11914, 13, 128001]
num_reserved_special_tokens = 256
@ -866,6 +867,7 @@ class TikTokenIntegrationTests(unittest.TestCase):
tiktoken_tokenizer = PreTrainedTokenizerFast.from_pretrained(
model_path,
subfolder=subfolder,
additional_special_tokens=special_tokens,
bos_token="<|begin_of_text|>",
eos_token="<|end_of_text|>",
@ -874,7 +876,14 @@ class TikTokenIntegrationTests(unittest.TestCase):
self.assertEqual(tokens[0], "<|begin_of_text|>")
tiktoken_tokenizer = AutoTokenizer.from_pretrained(
model_path, legacy=False, additional_special_tokens=special_tokens, add_bos_token=True, add_eos_token=True
model_path,
subfolder=subfolder,
legacy=False,
additional_special_tokens=special_tokens,
bos_token="<|begin_of_text|>",
eos_token="<|end_of_text|>",
add_bos_token=True,
add_eos_token=True,
)
self.assertTrue(isinstance(tiktoken_tokenizer, PreTrainedTokenizerFast))
@ -892,7 +901,10 @@ class TikTokenIntegrationTests(unittest.TestCase):
tiktoken_tokenizer = AutoTokenizer.from_pretrained(
model_path,
subfolder=subfolder,
additional_special_tokens=special_tokens,
bos_token="<|begin_of_text|>",
eos_token="<|end_of_text|>",
from_slow=True,
add_bos_token=True,
add_eos_token=True,