From fd3eb3e3cd62f1a078aadba791d03d042678313e Mon Sep 17 00:00:00 2001
From: Joao Gante <joaofranciscocardosogante@gmail.com>
Date: Wed, 22 Mar 2023 15:20:48 +0000
Subject: [PATCH] Beef up Llama tests (#22314)

* tmp commit

* beef up llama tests
---
 tests/generation/test_utils.py            |  2 +-
 tests/models/llama/test_modeling_llama.py | 31 ++++++++++-------------
 2 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py
index de319c2b000..a269b68d002 100644
--- a/tests/generation/test_utils.py
+++ b/tests/generation/test_utils.py
@@ -1463,10 +1463,10 @@ class GenerationTesterMixin:
         attention_names = ["encoder_attentions", "decoder_attentions", "cross_attentions"]
         for model_class in self.all_generative_model_classes:
             config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
-            model = model_class(config).to(torch_device)
             # We want to test only encoder-decoder models
             if not config.is_encoder_decoder:
                 continue
+            model = model_class(config).to(torch_device)
 
             head_masking = {
                 "head_mask": torch.zeros(config.encoder_layers, config.encoder_attention_heads, device=torch_device),
diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
index dea92d5111f..113f74d3097 100644
--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -20,8 +20,10 @@ import unittest
 from transformers import LlamaConfig, is_torch_available
 from transformers.testing_utils import require_torch, torch_device
 
+from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
+from ...test_pipeline_mixin import PipelineTesterMixin
 
 
 if is_torch_available():
@@ -254,10 +256,21 @@ class LlamaModelTester:
 
 
 @require_torch
-class LlamaModelTest(ModelTesterMixin, unittest.TestCase):
+class LlamaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
     all_model_classes = (LlamaModel, LlamaForCausalLM, LlamaForSequenceClassification) if is_torch_available() else ()
     all_generative_model_classes = (LlamaForCausalLM,) if is_torch_available() else ()
+    pipeline_model_mapping = (
+        {
+            "feature-extraction": LlamaModel,
+            "text-classification": LlamaForSequenceClassification,
+            "text-generation": LlamaForCausalLM,
+            "zero-shot": LlamaForSequenceClassification,
+        }
+        if is_torch_available()
+        else {}
+    )
     test_headmasking = False
+    test_pruning = False
 
     def setUp(self):
         self.model_tester = LlamaModelTester(self)
@@ -316,22 +329,6 @@ class LlamaModelTest(ModelTesterMixin, unittest.TestCase):
         result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels)
         self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels))
 
-    @unittest.skip("LLaMA does not support head pruning.")
-    def test_head_pruning(self):
-        pass
-
-    @unittest.skip("LLaMA does not support head pruning.")
-    def test_head_pruning_integration(self):
-        pass
-
-    @unittest.skip("LLaMA does not support head pruning.")
-    def test_head_pruning_save_load_from_config_init(self):
-        pass
-
-    @unittest.skip("LLaMA does not support head pruning.")
-    def test_head_pruning_save_load_from_pretrained(self):
-        pass
-
     @unittest.skip("LLaMA buffers include complex numbers, which breaks this test")
     def test_save_load_fast_init_from_base(self):
         pass