# coding=utf-8 # Copyright 2024 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Testing suite for the PyTorch Gemma2 model.""" import unittest from transformers import AutoModelForCausalLM, AutoTokenizer, Gemma2Config, is_torch_available from transformers.testing_utils import ( require_read_token, require_torch, require_torch_gpu, slow, torch_device, ) from ...models.gemma.test_modeling_gemma import GemmaModelTest, GemmaModelTester from ...test_configuration_common import ConfigTester if is_torch_available(): import torch from transformers import ( Gemma2ForCausalLM, Gemma2ForSequenceClassification, Gemma2ForTokenClassification, Gemma2Model, ) class Gemma2ModelTester(GemmaModelTester): config_class = Gemma2Config model_class = Gemma2Model for_causal_lm_class = Gemma2ForCausalLM for_sequence_class = Gemma2ForSequenceClassification for_token_class = Gemma2ForTokenClassification @require_torch class Gemma2ModelTest(GemmaModelTest, unittest.TestCase): all_model_classes = ( (Gemma2Model, Gemma2ForCausalLM, Gemma2ForSequenceClassification, Gemma2ForTokenClassification) if is_torch_available() else () ) all_generative_model_classes = () pipeline_model_mapping = ( { "feature-extraction": Gemma2Model, "text-classification": Gemma2ForSequenceClassification, "token-classification": Gemma2ForTokenClassification, "text-generation": Gemma2ForCausalLM, "zero-shot": Gemma2ForSequenceClassification, } if is_torch_available() else {} ) test_headmasking = False test_pruning = False _is_stateful = True model_split_percents = [0.5, 0.6] _torch_compile_test_ckpt = "google/gemma-2-9b" def setUp(self): self.model_tester = Gemma2ModelTester(self) self.config_tester = ConfigTester(self, config_class=Gemma2Config, hidden_size=37) @unittest.skip("Eager and SDPA do not produce the same outputs, thus this test fails") def test_model_outputs_equivalence(self, **kwargs): pass @unittest.skip("Gemma2's outputs are expected to be different") def test_eager_matches_sdpa_inference(self): pass @slow @require_torch_gpu class Gemma2IntegrationTest(unittest.TestCase): input_text = ["Hello I am doing", "Hi today"] # This variable is used to determine which CUDA device are we using for our runners (A10 or T4) # Depending on the hardware we get different logits / generations cuda_compute_capability_major_version = None @classmethod def setUpClass(cls): if is_torch_available() and torch.cuda.is_available(): # 8 is for A100 / A10 and 7 for T4 cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0] @require_read_token def test_model_2b_bf16(self): model_id = "google/gemma-2-9b" EXPECTED_TEXTS = [ "Hello I am doing a project for a class and I am trying to use the ", "Hi today. So, I'm going to show you how to do a problem from the textbook. So", ] model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to( torch_device ) tokenizer = AutoTokenizer.from_pretrained(model_id) inputs = tokenizer(self.input_text, return_tensors="pt", padding=True).to(torch_device) output = model.generate(**inputs, max_new_tokens=20, do_sample=False) output_text = tokenizer.batch_decode(output, skip_special_tokens=True) self.assertEqual(output_text, EXPECTED_TEXTS) @require_read_token def test_model_2b_fp16(self): model_id = "google/gemma-2-9b" EXPECTED_TEXTS = [ "Hello I am doing a project on the effect of the temperature on the rate of a reaction. I am using a ", "Hi today I'm going to be talking about the 1000-4000-", ] model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.float16).to( torch_device ) tokenizer = AutoTokenizer.from_pretrained(model_id) inputs = tokenizer(self.input_text, return_tensors="pt", padding=True).to(torch_device) output = model.generate(**inputs, max_new_tokens=20, do_sample=False) output_text = tokenizer.batch_decode(output, skip_special_tokens=True) self.assertEqual(output_text, EXPECTED_TEXTS)