diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py index 78f49a5e2da..936d728b598 100644 --- a/src/transformers/pipelines/document_question_answering.py +++ b/src/transformers/pipelines/document_question_answering.py @@ -131,6 +131,11 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + if self.tokenizer is not None and not self.tokenizer.__class__.__name__.endswith("Fast"): + raise ValueError( + "`DocumentQuestionAnsweringPipeline` requires a fast tokenizer, but a slow tokenizer " + f"(`{self.tokenizer.__class__.__name__}`) is provided." + ) if self.model.config.__class__.__name__ == "VisionEncoderDecoderConfig": self.model_type = ModelType.VisionEncoderDecoder diff --git a/tests/models/layoutlm/test_modeling_layoutlm.py b/tests/models/layoutlm/test_modeling_layoutlm.py index d2aad061c38..0535fbf4e1f 100644 --- a/tests/models/layoutlm/test_modeling_layoutlm.py +++ b/tests/models/layoutlm/test_modeling_layoutlm.py @@ -246,20 +246,6 @@ class LayoutLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase ) fx_compatible = True - # TODO: Fix the failed tests - def is_pipeline_test_to_skip( - self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name - ): - if ( - pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests" - and tokenizer_name is not None - and not tokenizer_name.endswith("Fast") - ): - # This pipeline uses `sequence_ids()` which is only available for fast tokenizers. - return True - - return False - def setUp(self): self.model_tester = LayoutLMModelTester(self) self.config_tester = ConfigTester(self, config_class=LayoutLMConfig, hidden_size=37) diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py index 6c82a34a626..2b17eadff57 100644 --- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py @@ -295,15 +295,10 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa # `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny # config. With new tiny model creation, it is available, but we need to fix the failed tests. return True - elif ( - pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests" - and tokenizer_name is not None - and not tokenizer_name.endswith("Fast") - ): - # This pipeline uses `sequence_ids()` which is only available for fast tokenizers. - return True - return False + return super().is_pipeline_test_to_skip( + pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name + ) def setUp(self): self.model_tester = LayoutLMv2ModelTester(self) diff --git a/tests/test_pipeline_mixin.py b/tests/test_pipeline_mixin.py index 82a23a94b40..a73121966c3 100644 --- a/tests/test_pipeline_mixin.py +++ b/tests/test_pipeline_mixin.py @@ -428,9 +428,19 @@ class PipelineTesterMixin: def test_pipeline_zero_shot_object_detection(self): self.run_task_tests(task="zero-shot-object-detection") + # This contains the test cases to be skipped without model architecture being involved. def is_pipeline_test_to_skip( self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name ): + # No fix is required for this case. + if ( + pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests" + and tokenizer_name is not None + and not tokenizer_name.endswith("Fast") + ): + # `DocumentQuestionAnsweringPipelineTests` requires a fast tokenizer. + return True + return False