DocumentQuestionAnsweringPipeline only for fast ⚡ tokenizers (#22745)

* fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-07-05 05:40:05 +06:00 · 2023-04-13 17:22:59 +02:00 · 2023-04-13 17:22:59 +02:00 · 32b08742a5
commit 32b08742a5
parent 4def2fe969
4 changed files with 18 additions and 22 deletions
--- a/src/transformers/pipelines/document_question_answering.py
+++ b/src/transformers/pipelines/document_question_answering.py
@ -131,6 +131,11 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        if self.tokenizer is not None and not self.tokenizer.__class__.__name__.endswith("Fast"):
            raise ValueError(
                "`DocumentQuestionAnsweringPipeline` requires a fast tokenizer, but a slow tokenizer "
                f"(`{self.tokenizer.__class__.__name__}`) is provided."
            )
        if self.model.config.__class__.__name__ == "VisionEncoderDecoderConfig":
            self.model_type = ModelType.VisionEncoderDecoder
--- a/tests/models/layoutlm/test_modeling_layoutlm.py
+++ b/tests/models/layoutlm/test_modeling_layoutlm.py
@ -246,20 +246,6 @@ class LayoutLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
    )
    fx_compatible = True
    # TODO: Fix the failed tests
    def is_pipeline_test_to_skip(
        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
    ):
        if (
            pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
            and tokenizer_name is not None
            and not tokenizer_name.endswith("Fast")
        ):
            # This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
            return True
        return False
    def setUp(self):
        self.model_tester = LayoutLMModelTester(self)
        self.config_tester = ConfigTester(self, config_class=LayoutLMConfig, hidden_size=37)
--- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
@ -295,15 +295,10 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
            # `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny
            # config. With new tiny model creation, it is available, but we need to fix the failed tests.
            return True
        elif (
            pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
            and tokenizer_name is not None
            and not tokenizer_name.endswith("Fast")
        ):
            # This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
            return True
-        return False
+        return super().is_pipeline_test_to_skip(
            pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
        )
    def setUp(self):
        self.model_tester = LayoutLMv2ModelTester(self)
--- a/tests/test_pipeline_mixin.py
+++ b/tests/test_pipeline_mixin.py
@ -428,9 +428,19 @@ class PipelineTesterMixin:
    def test_pipeline_zero_shot_object_detection(self):
        self.run_task_tests(task="zero-shot-object-detection")
    # This contains the test cases to be skipped without model architecture being involved.
    def is_pipeline_test_to_skip(
        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
    ):
        # No fix is required for this case.
        if (
            pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
            and tokenizer_name is not None
            and not tokenizer_name.endswith("Fast")
        ):
            # `DocumentQuestionAnsweringPipelineTests` requires a fast tokenizer.
            return True
        return False