mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 13:20:12 +06:00
DocumentQuestionAnsweringPipeline
only for fast ⚡ tokenizers (#22745)
* fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
4def2fe969
commit
32b08742a5
@ -131,6 +131,11 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
if self.tokenizer is not None and not self.tokenizer.__class__.__name__.endswith("Fast"):
|
||||
raise ValueError(
|
||||
"`DocumentQuestionAnsweringPipeline` requires a fast tokenizer, but a slow tokenizer "
|
||||
f"(`{self.tokenizer.__class__.__name__}`) is provided."
|
||||
)
|
||||
|
||||
if self.model.config.__class__.__name__ == "VisionEncoderDecoderConfig":
|
||||
self.model_type = ModelType.VisionEncoderDecoder
|
||||
|
@ -246,20 +246,6 @@ class LayoutLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
|
||||
)
|
||||
fx_compatible = True
|
||||
|
||||
# TODO: Fix the failed tests
|
||||
def is_pipeline_test_to_skip(
|
||||
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
|
||||
):
|
||||
if (
|
||||
pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
|
||||
and tokenizer_name is not None
|
||||
and not tokenizer_name.endswith("Fast")
|
||||
):
|
||||
# This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = LayoutLMModelTester(self)
|
||||
self.config_tester = ConfigTester(self, config_class=LayoutLMConfig, hidden_size=37)
|
||||
|
@ -295,15 +295,10 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
|
||||
# `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny
|
||||
# config. With new tiny model creation, it is available, but we need to fix the failed tests.
|
||||
return True
|
||||
elif (
|
||||
pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
|
||||
and tokenizer_name is not None
|
||||
and not tokenizer_name.endswith("Fast")
|
||||
):
|
||||
# This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
|
||||
return True
|
||||
|
||||
return False
|
||||
return super().is_pipeline_test_to_skip(
|
||||
pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
|
||||
)
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = LayoutLMv2ModelTester(self)
|
||||
|
@ -428,9 +428,19 @@ class PipelineTesterMixin:
|
||||
def test_pipeline_zero_shot_object_detection(self):
|
||||
self.run_task_tests(task="zero-shot-object-detection")
|
||||
|
||||
# This contains the test cases to be skipped without model architecture being involved.
|
||||
def is_pipeline_test_to_skip(
|
||||
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
|
||||
):
|
||||
# No fix is required for this case.
|
||||
if (
|
||||
pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
|
||||
and tokenizer_name is not None
|
||||
and not tokenizer_name.endswith("Fast")
|
||||
):
|
||||
# `DocumentQuestionAnsweringPipelineTests` requires a fast tokenizer.
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user