From 6494479f1de9fe16e9c6f89e52eb0cf81f864a7c Mon Sep 17 00:00:00 2001
From: Fanli Lin <fanli.lin@intel.com>
Date: Mon, 29 Jul 2024 17:29:11 +0800
Subject: [PATCH] make `p_mask` a numpy array before passing to
 `select_starts_ends` (#32076)

* fix

* bug fix

* refine

* fix
---
 src/transformers/pipelines/document_question_answering.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py
index c840c14a719..aa4fb48aae6 100644
--- a/src/transformers/pipelines/document_question_answering.py
+++ b/src/transformers/pipelines/document_question_answering.py
@@ -378,7 +378,7 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
             # p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer)
             # We put 0 on the tokens from the context and 1 everywhere else (question and special tokens)
             # This logic mirrors the logic in the question_answering pipeline
-            p_mask = [[tok != 1 for tok in encoding.sequence_ids(span_id)] for span_id in range(num_spans)]
+            p_mask = np.array([[tok != 1 for tok in encoding.sequence_ids(span_id)] for span_id in range(num_spans)])
             for span_idx in range(num_spans):
                 if self.framework == "pt":
                     span_encoding = {k: torch.tensor(v[span_idx : span_idx + 1]) for (k, v) in encoding.items()}