# Copyright 2020 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import unittest from transformers.data.processors.squad import SquadExample from transformers.pipelines import Pipeline, QuestionAnsweringArgumentHandler from .test_pipelines_common import CustomInputPipelineCommonMixin class QAPipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase): pipeline_task = "question-answering" pipeline_running_kwargs = { "padding": "max_length", "max_seq_len": 25, "doc_stride": 5, } # Default is 'longest' but we use 'max_length' to test equivalence between slow/fast tokenizers small_models = [ "sshleifer/tiny-distilbert-base-cased-distilled-squad" ] # Models tested without the @slow decorator large_models = [] # Models tested with the @slow decorator valid_inputs = [ {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."}, { "question": "In what field is HuggingFace working ?", "context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.", }, { "question": ["In what field is HuggingFace working ?", "In what field is HuggingFace working ?"], "context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.", }, { "question": ["In what field is HuggingFace working ?", "In what field is HuggingFace working ?"], "context": [ "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.", "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.", ], }, ] def _test_pipeline(self, nlp: Pipeline): output_keys = {"score", "answer", "start", "end"} valid_inputs = [ {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."}, { "question": "In what field is HuggingFace working ?", "context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.", }, ] invalid_inputs = [ {"question": "", "context": "This is a test to try empty question edge case"}, {"question": None, "context": "This is a test to try empty question edge case"}, {"question": "What is does with empty context ?", "context": ""}, {"question": "What is does with empty context ?", "context": None}, ] self.assertIsNotNone(nlp) mono_result = nlp(valid_inputs[0]) self.assertIsInstance(mono_result, dict) for key in output_keys: self.assertIn(key, mono_result) multi_result = nlp(valid_inputs) self.assertIsInstance(multi_result, list) self.assertIsInstance(multi_result[0], dict) for result in multi_result: for key in output_keys: self.assertIn(key, result) for bad_input in invalid_inputs: self.assertRaises(ValueError, nlp, bad_input) self.assertRaises(ValueError, nlp, invalid_inputs) def test_argument_handler(self): qa = QuestionAnsweringArgumentHandler() Q = "Where was HuggingFace founded ?" C = "HuggingFace was founded in Paris" normalized = qa(Q, C) self.assertEqual(type(normalized), list) self.assertEqual(len(normalized), 1) self.assertEqual({type(el) for el in normalized}, {SquadExample}) normalized = qa(question=Q, context=C) self.assertEqual(type(normalized), list) self.assertEqual(len(normalized), 1) self.assertEqual({type(el) for el in normalized}, {SquadExample}) normalized = qa(question=Q, context=C) self.assertEqual(type(normalized), list) self.assertEqual(len(normalized), 1) self.assertEqual({type(el) for el in normalized}, {SquadExample}) normalized = qa(question=[Q, Q], context=C) self.assertEqual(type(normalized), list) self.assertEqual(len(normalized), 2) self.assertEqual({type(el) for el in normalized}, {SquadExample}) normalized = qa({"question": Q, "context": C}) self.assertEqual(type(normalized), list) self.assertEqual(len(normalized), 1) self.assertEqual({type(el) for el in normalized}, {SquadExample}) normalized = qa([{"question": Q, "context": C}]) self.assertEqual(type(normalized), list) self.assertEqual(len(normalized), 1) self.assertEqual({type(el) for el in normalized}, {SquadExample}) normalized = qa([{"question": Q, "context": C}, {"question": Q, "context": C}]) self.assertEqual(type(normalized), list) self.assertEqual(len(normalized), 2) self.assertEqual({type(el) for el in normalized}, {SquadExample}) normalized = qa(X={"question": Q, "context": C}) self.assertEqual(type(normalized), list) self.assertEqual(len(normalized), 1) self.assertEqual({type(el) for el in normalized}, {SquadExample}) normalized = qa(X=[{"question": Q, "context": C}]) self.assertEqual(type(normalized), list) self.assertEqual(len(normalized), 1) self.assertEqual({type(el) for el in normalized}, {SquadExample}) normalized = qa(data={"question": Q, "context": C}) self.assertEqual(type(normalized), list) self.assertEqual(len(normalized), 1) self.assertEqual({type(el) for el in normalized}, {SquadExample}) def test_argument_handler_error_handling(self): qa = QuestionAnsweringArgumentHandler() Q = "Where was HuggingFace founded ?" C = "HuggingFace was founded in Paris" with self.assertRaises(KeyError): qa({"context": C}) with self.assertRaises(KeyError): qa({"question": Q}) with self.assertRaises(KeyError): qa([{"context": C}]) with self.assertRaises(ValueError): qa(None, C) with self.assertRaises(ValueError): qa("", C) with self.assertRaises(ValueError): qa(Q, None) with self.assertRaises(ValueError): qa(Q, "") with self.assertRaises(ValueError): qa(question=None, context=C) with self.assertRaises(ValueError): qa(question="", context=C) with self.assertRaises(ValueError): qa(question=Q, context=None) with self.assertRaises(ValueError): qa(question=Q, context="") with self.assertRaises(ValueError): qa({"question": None, "context": C}) with self.assertRaises(ValueError): qa({"question": "", "context": C}) with self.assertRaises(ValueError): qa({"question": Q, "context": None}) with self.assertRaises(ValueError): qa({"question": Q, "context": ""}) with self.assertRaises(ValueError): qa([{"question": Q, "context": C}, {"question": None, "context": C}]) with self.assertRaises(ValueError): qa([{"question": Q, "context": C}, {"question": "", "context": C}]) with self.assertRaises(ValueError): qa([{"question": Q, "context": C}, {"question": Q, "context": None}]) with self.assertRaises(ValueError): qa([{"question": Q, "context": C}, {"question": Q, "context": ""}]) with self.assertRaises(ValueError): qa(question={"This": "Is weird"}, context="This is a context") with self.assertRaises(ValueError): qa(question=[Q, Q], context=[C, C, C]) with self.assertRaises(ValueError): qa(question=[Q, Q, Q], context=[C, C]) def test_argument_handler_old_format(self): qa = QuestionAnsweringArgumentHandler() Q = "Where was HuggingFace founded ?" C = "HuggingFace was founded in Paris" # Backward compatibility for this normalized = qa(question=[Q, Q], context=[C, C]) self.assertEqual(type(normalized), list) self.assertEqual(len(normalized), 2) self.assertEqual({type(el) for el in normalized}, {SquadExample}) def test_argument_handler_error_handling_odd(self): qa = QuestionAnsweringArgumentHandler() with self.assertRaises(ValueError): qa(None) with self.assertRaises(ValueError): qa(Y=None) with self.assertRaises(ValueError): qa(1)