From 32b08742a58b43a5a905a28e434b8f67321be024 Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Thu, 13 Apr 2023 17:22:59 +0200
Subject: [PATCH] =?UTF-8?q?`DocumentQuestionAnsweringPipeline`=20only=20fo?=
 =?UTF-8?q?r=20fast=20=E2=9A=A1=20tokenizers=20(#22745)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 .../pipelines/document_question_answering.py       |  5 +++++
 tests/models/layoutlm/test_modeling_layoutlm.py    | 14 --------------
 .../models/layoutlmv2/test_modeling_layoutlmv2.py  | 11 +++--------
 tests/test_pipeline_mixin.py                       | 10 ++++++++++
 4 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py
index 78f49a5e2da..936d728b598 100644
--- a/src/transformers/pipelines/document_question_answering.py
+++ b/src/transformers/pipelines/document_question_answering.py
@@ -131,6 +131,11 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
+        if self.tokenizer is not None and not self.tokenizer.__class__.__name__.endswith("Fast"):
+            raise ValueError(
+                "`DocumentQuestionAnsweringPipeline` requires a fast tokenizer, but a slow tokenizer "
+                f"(`{self.tokenizer.__class__.__name__}`) is provided."
+            )
 
         if self.model.config.__class__.__name__ == "VisionEncoderDecoderConfig":
             self.model_type = ModelType.VisionEncoderDecoder
diff --git a/tests/models/layoutlm/test_modeling_layoutlm.py b/tests/models/layoutlm/test_modeling_layoutlm.py
index d2aad061c38..0535fbf4e1f 100644
--- a/tests/models/layoutlm/test_modeling_layoutlm.py
+++ b/tests/models/layoutlm/test_modeling_layoutlm.py
@@ -246,20 +246,6 @@ class LayoutLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
     )
     fx_compatible = True
 
-    # TODO: Fix the failed tests
-    def is_pipeline_test_to_skip(
-        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
-    ):
-        if (
-            pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
-            and tokenizer_name is not None
-            and not tokenizer_name.endswith("Fast")
-        ):
-            # This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
-            return True
-
-        return False
-
     def setUp(self):
         self.model_tester = LayoutLMModelTester(self)
         self.config_tester = ConfigTester(self, config_class=LayoutLMConfig, hidden_size=37)
diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
index 6c82a34a626..2b17eadff57 100644
--- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
@@ -295,15 +295,10 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
             # `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny
             # config. With new tiny model creation, it is available, but we need to fix the failed tests.
             return True
-        elif (
-            pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
-            and tokenizer_name is not None
-            and not tokenizer_name.endswith("Fast")
-        ):
-            # This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
-            return True
 
-        return False
+        return super().is_pipeline_test_to_skip(
+            pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+        )
 
     def setUp(self):
         self.model_tester = LayoutLMv2ModelTester(self)
diff --git a/tests/test_pipeline_mixin.py b/tests/test_pipeline_mixin.py
index 82a23a94b40..a73121966c3 100644
--- a/tests/test_pipeline_mixin.py
+++ b/tests/test_pipeline_mixin.py
@@ -428,9 +428,19 @@ class PipelineTesterMixin:
     def test_pipeline_zero_shot_object_detection(self):
         self.run_task_tests(task="zero-shot-object-detection")
 
+    # This contains the test cases to be skipped without model architecture being involved.
     def is_pipeline_test_to_skip(
         self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
     ):
+        # No fix is required for this case.
+        if (
+            pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
+            and tokenizer_name is not None
+            and not tokenizer_name.endswith("Fast")
+        ):
+            # `DocumentQuestionAnsweringPipelineTests` requires a fast tokenizer.
+            return True
+
         return False