transformers/tests/models/helium/test_modeling_helium.py

# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testing suite for the PyTorch Helium model."""

import unittest

from transformers import AutoModelForCausalLM, AutoTokenizer, HeliumConfig, is_torch_available
from transformers.testing_utils import (
    require_read_token,
    require_torch,
    slow,
    torch_device,
)

from ...test_configuration_common import ConfigTester
from ..gemma.test_modeling_gemma import GemmaModelTest, GemmaModelTester


if is_torch_available():
    import torch

    from transformers import (
        HeliumForCausalLM,
        HeliumForSequenceClassification,
        HeliumForTokenClassification,
        HeliumModel,
    )


class HeliumModelTester(GemmaModelTester):
    if is_torch_available():
        config_class = HeliumConfig
        model_class = HeliumModel
        for_causal_lm_class = HeliumForCausalLM
        for_sequence_class = HeliumForSequenceClassification
        for_token_class = HeliumForTokenClassification


@require_torch
class HeliumModelTest(GemmaModelTest, unittest.TestCase):
    all_model_classes = (
        (HeliumModel, HeliumForCausalLM, HeliumForSequenceClassification, HeliumForTokenClassification)
        if is_torch_available()
        else ()
    )
    all_generative_model_classes = (HeliumForCausalLM,) if is_torch_available() else ()
    pipeline_model_mapping = (
        {
            "feature-extraction": HeliumModel,
            "text-classification": HeliumForSequenceClassification,
            "token-classification": HeliumForTokenClassification,
            "text-generation": HeliumForCausalLM,
            "zero-shot": HeliumForSequenceClassification,
        }
        if is_torch_available()
        else {}
    )
    test_headmasking = False
    test_pruning = False
    _is_stateful = True
    model_split_percents = [0.5, 0.6]

    def setUp(self):
        self.model_tester = HeliumModelTester(self)
        self.config_tester = ConfigTester(self, config_class=HeliumConfig, hidden_size=37)


@slow
# @require_torch_gpu
class HeliumIntegrationTest(unittest.TestCase):
    input_text = ["Hello, today is a great day to"]
    # This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
    # Depending on the hardware we get different logits / generations
    cuda_compute_capability_major_version = None

    @classmethod
    def setUpClass(cls):
        if is_torch_available() and torch.cuda.is_available():
            # 8 is for A100 / A10 and 7 for T4
            cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]

    @require_read_token
    def test_model_2b(self):
        model_id = "kyutai/helium-1-preview"
        EXPECTED_TEXTS = [
            "Hello, today is a great day to start a new project. I have been working on a new project for a while now and I have"
        ]

        model = AutoModelForCausalLM.from_pretrained(
            model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, revision="refs/pr/1"
        ).to(torch_device)
        tokenizer = AutoTokenizer.from_pretrained(model_id, revision="refs/pr/1")
        inputs = tokenizer(self.input_text, return_tensors="pt", padding=True).to(torch_device)

        output = model.generate(**inputs, max_new_tokens=20, do_sample=False)
        output_text = tokenizer.batch_decode(output, skip_special_tokens=True)

        self.assertEqual(output_text, EXPECTED_TEXTS)