Fixes #17128 . (#17356)

VisibleDeprecationWarning is addressed by specifying dtype=object when creating numpy array. Update code based on review feedback. Undo whitespace changes to tokenization_utils_base.py. Co-authored-by: I like data <ilikedata@nym.hush.com>
2025-08-02 19:21:31 +06:00 · 2022-06-10 19:06:48 +05:30 · 2022-06-10 19:06:48 +05:30 · 35b16032cb
commit 35b16032cb
parent b88090914d
1 changed files with 5 additions and 10 deletions
--- a/src/transformers/pipelines/question_answering.py
+++ b/src/transformers/pipelines/question_answering.py
@ -279,7 +279,6 @@ class QuestionAnsweringPipeline(ChunkPipeline):
                truncation="only_second" if question_first else "only_first",
                max_length=max_seq_len,
                stride=doc_stride,
-                return_tensors="np",
                return_token_type_ids=True,
                return_overflowing_tokens=True,
                return_offsets_mapping=True,
@ -294,12 +293,10 @@ class QuestionAnsweringPipeline(ChunkPipeline):

            # p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer)
            # We put 0 on the tokens from the context and 1 everywhere else (question and special tokens)
-            p_mask = np.asarray(
-                [
-                    [tok != 1 if question_first else 0 for tok in encoded_inputs.sequence_ids(span_id)]
-                    for span_id in range(num_spans)
-                ]
-            )
+            p_mask = [
+                [tok != 1 if question_first else 0 for tok in encoded_inputs.sequence_ids(span_id)]
+                for span_id in range(num_spans)
+            ]

            features = []
            for span_idx in range(num_spans):
@ -316,8 +313,6 @@ class QuestionAnsweringPipeline(ChunkPipeline):
                    for cls_index in cls_indices:
                        p_mask[span_idx][cls_index] = 0
                submask = p_mask[span_idx]
-                if isinstance(submask, np.ndarray):
-                    submask = submask.tolist()
                features.append(
                    SquadFeatures(
                        input_ids=input_ids_span_idx,
@ -344,7 +339,7 @@ class QuestionAnsweringPipeline(ChunkPipeline):
        for i, feature in enumerate(features):
            fw_args = {}
            others = {}
-            model_input_names = self.tokenizer.model_input_names + ["p_mask"]
+            model_input_names = self.tokenizer.model_input_names + ["p_mask", "token_type_ids"]

            for k, v in feature.__dict__.items():
                if k in model_input_names: