From 3587769c08ffaf42c99f6882d4ad76d3a3669e5e Mon Sep 17 00:00:00 2001 From: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> Date: Fri, 1 Sep 2023 19:27:01 +0100 Subject: [PATCH] [VITS] Only trigger tokenizer warning for uroman (#25915) --- src/transformers/models/vits/tokenization_vits.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/vits/tokenization_vits.py b/src/transformers/models/vits/tokenization_vits.py index eb973ec7d0a..f2cc6be3e43 100644 --- a/src/transformers/models/vits/tokenization_vits.py +++ b/src/transformers/models/vits/tokenization_vits.py @@ -184,7 +184,7 @@ class VitsTokenizer(PreTrainedTokenizer): filtered_text = self._preprocess_char(text) - if has_non_roman_characters(filtered_text): + if has_non_roman_characters(filtered_text) and self.is_uroman: logger.warning( "Text to the tokenizer contains non-Roman characters. Ensure the `uroman` Romanizer is " "applied to the text prior to passing it to the tokenizer. See "