Fixing object detection with layoutlm (#20776)

* Fixing object detection with layoutlm. * Fixup.
2025-08-02 19:21:31 +06:00 · 2022-12-15 18:46:43 +01:00 · 2022-12-15 18:46:43 +01:00 · fca66abe2a
commit fca66abe2a
parent 8891193e83
2 changed files with 9 additions and 12 deletions
--- a/src/transformers/pipelines/init.py
+++ b/src/transformers/pipelines/init.py
@ -378,7 +378,12 @@ NO_TOKENIZER_TASKS = set()
 # any tokenizer/feature_extractor might be use for a given model so we cannot
 # use the statically defined TOKENIZER_MAPPING and FEATURE_EXTRACTOR_MAPPING to
 # see if the model defines such objects or not.
-MULTI_MODEL_CONFIGS = {"SpeechEncoderDecoderConfig", "VisionEncoderDecoderConfig", "VisionTextDualEncoderConfig"}
+MULTI_MODEL_CONFIGS = {
+    "SpeechEncoderDecoderConfig",
+    "VisionEncoderDecoderConfig",
+    "VisionTextDualEncoderConfig",
+    "LayoutLMConfig",
+}
 for task, values in SUPPORTED_TASKS.items():
    if values["type"] == "text":
        NO_FEATURE_EXTRACTOR_TASKS.add(task)
--- a/tests/pipelines/test_pipelines_object_detection.py
+++ b/tests/pipelines/test_pipelines_object_detection.py
@ -256,17 +256,9 @@ class ObjectDetectionPipelineTests(unittest.TestCase, metaclass=PipelineTestCase
            "https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png"
        )
        self.assertEqual(
-            nested_simplify(outputs, decimals=4),
+            nested_simplify(outputs, decimals=3),
            [
-                {
-                    "score": 0.9982,
-                    "label": "B-QUESTION",
-                    "box": {"xmin": 654, "ymin": 165, "xmax": 719, "ymax": 719},
-                },
-                {
-                    "score": 0.9982,
-                    "label": "I-QUESTION",
-                    "box": {"xmin": 691, "ymin": 202, "xmax": 735, "ymax": 735},
-                },
+                {"score": 0.998, "label": "B-QUESTION", "box": {"xmin": 462, "ymin": 234, "xmax": 508, "ymax": 249}},
+                {"score": 0.999, "label": "I-QUESTION", "box": {"xmin": 489, "ymin": 286, "xmax": 519, "ymax": 301}},
            ],
        )