From 6494479f1de9fe16e9c6f89e52eb0cf81f864a7c Mon Sep 17 00:00:00 2001 From: Fanli Lin Date: Mon, 29 Jul 2024 17:29:11 +0800 Subject: [PATCH] make `p_mask` a numpy array before passing to `select_starts_ends` (#32076) * fix * bug fix * refine * fix --- src/transformers/pipelines/document_question_answering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py index c840c14a719..aa4fb48aae6 100644 --- a/src/transformers/pipelines/document_question_answering.py +++ b/src/transformers/pipelines/document_question_answering.py @@ -378,7 +378,7 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline): # p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer) # We put 0 on the tokens from the context and 1 everywhere else (question and special tokens) # This logic mirrors the logic in the question_answering pipeline - p_mask = [[tok != 1 for tok in encoding.sequence_ids(span_id)] for span_id in range(num_spans)] + p_mask = np.array([[tok != 1 for tok in encoding.sequence_ids(span_id)] for span_id in range(num_spans)]) for span_idx in range(num_spans): if self.framework == "pt": span_encoding = {k: torch.tensor(v[span_idx : span_idx + 1]) for (k, v) in encoding.items()}