import unittest from transformers import pipeline from transformers.testing_utils import require_tf, require_torch, slow from .test_pipelines_common import MonoInputPipelineCommonMixin EXPECTED_FILL_MASK_RESULT = [ [ {"sequence": "My name is John", "score": 0.00782308354973793, "token": 610, "token_str": "ĠJohn"}, {"sequence": "My name is Chris", "score": 0.007475061342120171, "token": 1573, "token_str": "ĠChris"}, ], [ {"sequence": "The largest city in France is Paris", "score": 0.3185044229030609, "token": 2201}, {"sequence": "The largest city in France is Lyon", "score": 0.21112334728240967, "token": 12790}, ], ] EXPECTED_FILL_MASK_TARGET_RESULT = [ [ { "sequence": "My name is Patrick", "score": 0.004992353264242411, "token": 3499, "token_str": "ĠPatrick", }, { "sequence": "My name is Clara", "score": 0.00019297805556561798, "token": 13606, "token_str": "ĠClara", }, ] ] class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase): pipeline_task = "fill-mask" pipeline_loading_kwargs = {"topk": 2} small_models = ["sshleifer/tiny-distilroberta-base"] # Models tested without the @slow decorator large_models = ["distilroberta-base"] # Models tested with the @slow decorator mandatory_keys = {"sequence", "score", "token"} valid_inputs = [ "My name is ", "The largest city in France is ", ] invalid_inputs = [ "This is " # More than 1 mask_token in the input is not supported "This is" # No mask_token is not supported ] expected_check_keys = ["sequence"] @require_torch def test_torch_fill_mask_with_targets(self): valid_inputs = ["My name is "] valid_targets = [[" Teven", " Patrick", " Clara"], [" Sam"]] invalid_targets = [[], [""], ""] for model_name in self.small_models: nlp = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="pt") for targets in valid_targets: outputs = nlp(valid_inputs, targets=targets) self.assertIsInstance(outputs, list) self.assertEqual(len(outputs), len(targets)) for targets in invalid_targets: self.assertRaises(ValueError, nlp, valid_inputs, targets=targets) @require_tf def test_tf_fill_mask_with_targets(self): valid_inputs = ["My name is "] valid_targets = [[" Teven", " Patrick", " Clara"], [" Sam"]] invalid_targets = [[], [""], ""] for model_name in self.small_models: nlp = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="tf") for targets in valid_targets: outputs = nlp(valid_inputs, targets=targets) self.assertIsInstance(outputs, list) self.assertEqual(len(outputs), len(targets)) for targets in invalid_targets: self.assertRaises(ValueError, nlp, valid_inputs, targets=targets) @require_torch @slow def test_torch_fill_mask_results(self): mandatory_keys = {"sequence", "score", "token"} valid_inputs = [ "My name is ", "The largest city in France is ", ] valid_targets = [" Patrick", " Clara"] for model_name in self.large_models: nlp = pipeline( task="fill-mask", model=model_name, tokenizer=model_name, framework="pt", topk=2, ) self._test_mono_column_pipeline( nlp, valid_inputs, mandatory_keys, expected_multi_result=EXPECTED_FILL_MASK_RESULT, expected_check_keys=["sequence"], ) self._test_mono_column_pipeline( nlp, valid_inputs[:1], mandatory_keys, expected_multi_result=EXPECTED_FILL_MASK_TARGET_RESULT, expected_check_keys=["sequence"], targets=valid_targets, ) @require_tf @slow def test_tf_fill_mask_results(self): mandatory_keys = {"sequence", "score", "token"} valid_inputs = [ "My name is ", "The largest city in France is ", ] valid_targets = [" Patrick", " Clara"] for model_name in self.large_models: nlp = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="tf", topk=2) self._test_mono_column_pipeline( nlp, valid_inputs, mandatory_keys, expected_multi_result=EXPECTED_FILL_MASK_RESULT, expected_check_keys=["sequence"], ) self._test_mono_column_pipeline( nlp, valid_inputs[:1], mandatory_keys, expected_multi_result=EXPECTED_FILL_MASK_TARGET_RESULT, expected_check_keys=["sequence"], targets=valid_targets, )