[UdopTokenizer] Fix post merge imports (#29451)

* update * ... * nits * arf * 🧼 * beat the last guy * style everyone
2025-08-01 10:41:07 +06:00 · 2024-03-05 09:42:52 +01:00 · 2024-03-05 09:42:52 +01:00 · 132852203a
commit 132852203a
parent fa7f3cf336
3 changed files with 16 additions and 14 deletions
--- a/src/transformers/models/udop/tokenization_udop.py
+++ b/src/transformers/models/udop/tokenization_udop.py
@ -157,12 +157,6 @@ PRETRAINED_VOCAB_FILES_MAP = {
 }
 # TODO(PVP) - this should be removed in Transformers v5
 PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
    "microsoft/udop-large": 512,
 }
 class UdopTokenizer(PreTrainedTokenizer):
    """
    Adapted from [`LayoutXLMTokenizer`] and [`T5Tokenizer`]. Based on
@ -256,7 +250,6 @@ class UdopTokenizer(PreTrainedTokenizer):
    vocab_files_names = VOCAB_FILES_NAMES
    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
    model_input_names = ["input_ids", "attention_mask"]
    def __init__(
--- a/src/transformers/models/udop/tokenization_udop_fast.py
+++ b/src/transformers/models/udop/tokenization_udop_fast.py
@ -29,11 +29,6 @@ from ...tokenization_utils_base import (
 )
 from ...tokenization_utils_fast import PreTrainedTokenizerFast
 from ...utils import PaddingStrategy, TensorType, add_end_docstrings, is_sentencepiece_available, logging
 from ..udop.tokenization_udop import (
    PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES,
    PRETRAINED_VOCAB_FILES_MAP,
    VOCAB_FILES_NAMES,
 )
 if is_sentencepiece_available():
@ -42,6 +37,17 @@ else:
    UdopTokenizer = None
 VOCAB_FILES_NAMES = {"vocab_file": "spiece.model", "tokenizer_file": "tokenizer.json"}
 PRETRAINED_VOCAB_FILES_MAP = {
    "vocab_file": {
        "microsoft/udop-large": "https://huggingface.co/microsoft/udop-large/resolve/main/spiece.model",
    },
    "tokenizer_file": {
        "microsoft/udop-large": "https://huggingface.co/microsoft/udop-large/resolve/main/tokenizer.json",
    },
 }
 logger = logging.get_logger(__name__)
 UDOP_ENCODE_KWARGS_DOCSTRING = r"""
@ -197,7 +203,6 @@ class UdopTokenizerFast(PreTrainedTokenizerFast):
    vocab_files_names = VOCAB_FILES_NAMES
    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
    model_input_names = ["input_ids", "attention_mask"]
    slow_tokenizer_class = UdopTokenizer
--- a/tests/models/udop/test_tokenization_udop.py
+++ b/tests/models/udop/test_tokenization_udop.py
@ -22,12 +22,12 @@ from typing import List
 from transformers import (
    AddedToken,
    SpecialTokensMixin,
    UdopTokenizer,
    UdopTokenizerFast,
    is_tf_available,
    is_torch_available,
    logging,
 )
 from transformers.models.udop.tokenization_udop import UdopTokenizer
 from transformers.testing_utils import (
    get_tests_dir,
    is_pt_tf_cross_test,
@ -1717,6 +1717,10 @@ class UdopTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_alignement_methods(self):
        pass
    @unittest.skip("#TODO will be removed in main")
    def test_pretrained_model_lists(self):
        pass
    @unittest.skip("UDOP tokenizer requires boxes besides sequences.")
    def test_maximum_encoding_length_pair_input(self):
        pass