feat: Add granite architectures to auto tokenizer name mappings (#38802)

Branch: GraniteTokenizerMapping

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
Gabe Goodhart 2025-06-19 08:20:42 -06:00 committed by GitHub
parent 54a02160eb
commit 9a02e7602d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -245,6 +245,10 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, tuple[Optional[str], Optional[str]]](
("gpt_neox_japanese", ("GPTNeoXJapaneseTokenizer", None)),
("gptj", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)),
("gptsan-japanese", ("GPTSanJapaneseTokenizer", None)),
("granite", ("GPT2Tokenizer", None)),
("granitemoe", ("GPT2Tokenizer", None)),
("granitemoehybrid", ("GPT2Tokenizer", None)),
("granitemoeshared", ("GPT2Tokenizer", None)),
("grounding-dino", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
("groupvit", ("CLIPTokenizer", "CLIPTokenizerFast" if is_tokenizers_available() else None)),
("helium", (None, "PreTrainedTokenizerFast" if is_tokenizers_available() else None)),