transformers/tests/peft_integration/test_peft_integration.py
Benjamin Bossan 1a9188a54e
FIX: Faulty PEFT tests (#37757)
Two PEFT tests are actually failing:

tests/peft_integration/test_peft_integration.py::PeftIntegrationTester::test_delete_adapter
tests/peft_integration/test_peft_integration.py::PeftIntegrationTester::test_peft_pipeline_no_warning

This must have been going on for some time but was apparently never
noticed. The cause is that the tests themselves are faulty, the PEFT
integration is correct in these cases.

test_delete_adapter

The first faulty test was introduced by #34650. AFAICT, it should never
have passed in the first place, the PEFT integration logic was not
changed in the meantime. At this point, the logs for the PR CI are gone,
so I'm not sure if the test passed back then or not.

test_peft_pipeline_no_warning

This test was introduced in #36783 and should also never have passed, as
the self.assertNoLogs context manager only returns None, thus the assert
should never have worked (mea culpa for suggesting this code snippet).
Here too, the CI logs are deleted by now, so I can't check if the test
already failed back then.
2025-04-28 15:10:46 +02:00

862 lines
39 KiB
Python

# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib
import os
import re
import tempfile
import unittest
from datasets import Dataset, DatasetDict
from huggingface_hub import hf_hub_download
from packaging import version
from transformers import (
AutoModelForCausalLM,
AutoModelForSequenceClassification,
AutoTokenizer,
OPTForCausalLM,
Trainer,
TrainingArguments,
logging,
)
from transformers.testing_utils import (
CaptureLogger,
require_bitsandbytes,
require_peft,
require_torch,
require_torch_accelerator,
slow,
torch_device,
)
from transformers.utils import check_torch_load_is_safe, is_torch_available
if is_torch_available():
import torch
@require_peft
@require_torch
class PeftTesterMixin:
peft_test_model_ids = ("peft-internal-testing/tiny-OPTForCausalLM-lora",)
transformers_test_model_ids = ("hf-internal-testing/tiny-random-OPTForCausalLM",)
transformers_test_model_classes = (AutoModelForCausalLM, OPTForCausalLM)
# TODO: run it with CI after PEFT release.
@slow
class PeftIntegrationTester(unittest.TestCase, PeftTesterMixin):
"""
A testing suite that makes sure that the PeftModel class is correctly integrated into the transformers library.
"""
def _check_lora_correctly_converted(self, model):
"""
Utility method to check if the model has correctly adapters injected on it.
"""
from peft.tuners.tuners_utils import BaseTunerLayer
is_peft_loaded = False
for _, m in model.named_modules():
if isinstance(m, BaseTunerLayer):
is_peft_loaded = True
break
return is_peft_loaded
def test_peft_from_pretrained(self):
"""
Simple test that tests the basic usage of PEFT model through `from_pretrained`.
This checks if we pass a remote folder that contains an adapter config and adapter weights, it
should correctly load a model that has adapters injected on it.
"""
logger = logging.get_logger("transformers.integrations.peft")
for model_id in self.peft_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
with CaptureLogger(logger) as cl:
peft_model = transformers_class.from_pretrained(model_id).to(torch_device)
# ensure that under normal circumstances, there are no warnings about keys
self.assertNotIn("unexpected keys", cl.out)
self.assertNotIn("missing keys", cl.out)
self.assertTrue(self._check_lora_correctly_converted(peft_model))
self.assertTrue(peft_model._hf_peft_config_loaded)
# dummy generation
_ = peft_model.generate(input_ids=torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device))
def test_peft_state_dict(self):
"""
Simple test that checks if the returned state dict of `get_adapter_state_dict()` method contains
the expected keys.
"""
for model_id in self.peft_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
peft_model = transformers_class.from_pretrained(model_id).to(torch_device)
state_dict = peft_model.get_adapter_state_dict()
for key in state_dict.keys():
self.assertTrue("lora" in key)
def test_peft_save_pretrained(self):
"""
Test that checks various combinations of `save_pretrained` with a model that has adapters loaded
on it. This checks if the saved model contains the expected files (adapter weights and adapter config).
"""
for model_id in self.peft_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
peft_model = transformers_class.from_pretrained(model_id).to(torch_device)
with tempfile.TemporaryDirectory() as tmpdirname:
peft_model.save_pretrained(tmpdirname)
self.assertTrue("adapter_model.safetensors" in os.listdir(tmpdirname))
self.assertTrue("adapter_config.json" in os.listdir(tmpdirname))
self.assertTrue("config.json" not in os.listdir(tmpdirname))
self.assertTrue("pytorch_model.bin" not in os.listdir(tmpdirname))
self.assertTrue("model.safetensors" not in os.listdir(tmpdirname))
peft_model = transformers_class.from_pretrained(tmpdirname).to(torch_device)
self.assertTrue(self._check_lora_correctly_converted(peft_model))
peft_model.save_pretrained(tmpdirname, safe_serialization=False)
self.assertTrue("adapter_model.bin" in os.listdir(tmpdirname))
self.assertTrue("adapter_config.json" in os.listdir(tmpdirname))
peft_model = transformers_class.from_pretrained(tmpdirname).to(torch_device)
self.assertTrue(self._check_lora_correctly_converted(peft_model))
def test_peft_enable_disable_adapters(self):
"""
A test that checks if `enable_adapters` and `disable_adapters` methods work as expected.
"""
from peft import LoraConfig
dummy_input = torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device)
for model_id in self.transformers_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
peft_model = transformers_class.from_pretrained(model_id).to(torch_device)
peft_config = LoraConfig(init_lora_weights=False)
peft_model.add_adapter(peft_config)
peft_logits = peft_model(dummy_input).logits
peft_model.disable_adapters()
peft_logits_disabled = peft_model(dummy_input).logits
peft_model.enable_adapters()
peft_logits_enabled = peft_model(dummy_input).logits
torch.testing.assert_close(peft_logits, peft_logits_enabled, rtol=1e-12, atol=1e-12)
self.assertFalse(torch.allclose(peft_logits_enabled, peft_logits_disabled, atol=1e-12, rtol=1e-12))
def test_peft_add_adapter(self):
"""
Simple test that tests if `add_adapter` works as expected
"""
from peft import LoraConfig
for model_id in self.transformers_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
model = transformers_class.from_pretrained(model_id).to(torch_device)
peft_config = LoraConfig(init_lora_weights=False)
model.add_adapter(peft_config)
self.assertTrue(self._check_lora_correctly_converted(model))
# dummy generation
_ = model.generate(input_ids=torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device))
def test_peft_add_adapter_from_pretrained(self):
"""
Simple test that tests if `add_adapter` works as expected
"""
from peft import LoraConfig
for model_id in self.transformers_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
model = transformers_class.from_pretrained(model_id).to(torch_device)
peft_config = LoraConfig(init_lora_weights=False)
model.add_adapter(peft_config)
self.assertTrue(self._check_lora_correctly_converted(model))
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
model_from_pretrained = transformers_class.from_pretrained(tmpdirname).to(torch_device)
self.assertTrue(self._check_lora_correctly_converted(model_from_pretrained))
def test_peft_add_adapter_modules_to_save(self):
"""
Simple test that tests if `add_adapter` works as expected when training with
modules to save.
"""
from peft import LoraConfig
from peft.utils import ModulesToSaveWrapper
for model_id in self.transformers_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
dummy_input = torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device)
model = transformers_class.from_pretrained(model_id).to(torch_device)
peft_config = LoraConfig(init_lora_weights=False, modules_to_save=["lm_head"])
model.add_adapter(peft_config)
self._check_lora_correctly_converted(model)
_has_modules_to_save_wrapper = False
for name, module in model.named_modules():
if isinstance(module, ModulesToSaveWrapper):
_has_modules_to_save_wrapper = True
self.assertTrue(module.modules_to_save.default.weight.requires_grad)
self.assertTrue("lm_head" in name)
break
self.assertTrue(_has_modules_to_save_wrapper)
state_dict = model.get_adapter_state_dict()
self.assertTrue("lm_head.weight" in state_dict.keys())
logits = model(dummy_input).logits
loss = logits.mean()
loss.backward()
for _, param in model.named_parameters():
if param.requires_grad:
self.assertTrue(param.grad is not None)
def test_peft_add_adapter_training_gradient_checkpointing(self):
"""
Simple test that tests if `add_adapter` works as expected when training with
gradient checkpointing.
"""
from peft import LoraConfig
for model_id in self.transformers_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
model = transformers_class.from_pretrained(model_id).to(torch_device)
peft_config = LoraConfig(init_lora_weights=False)
model.add_adapter(peft_config)
self.assertTrue(self._check_lora_correctly_converted(model))
# When attaching adapters the input embeddings will stay frozen, this will
# lead to the output embedding having requires_grad=False.
dummy_input = torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device)
frozen_output = model.get_input_embeddings()(dummy_input)
self.assertTrue(frozen_output.requires_grad is False)
model.gradient_checkpointing_enable()
# Since here we attached the hook, the input should have requires_grad to set
# properly
non_frozen_output = model.get_input_embeddings()(dummy_input)
self.assertTrue(non_frozen_output.requires_grad is True)
# To repro the Trainer issue
dummy_input.requires_grad = False
for name, param in model.named_parameters():
if "lora" in name.lower():
self.assertTrue(param.requires_grad)
logits = model(dummy_input).logits
loss = logits.mean()
loss.backward()
for name, param in model.named_parameters():
if param.requires_grad:
self.assertTrue("lora" in name.lower())
self.assertTrue(param.grad is not None)
def test_peft_add_multi_adapter(self):
"""
Simple test that tests the basic usage of PEFT model through `from_pretrained`. This test tests if
add_adapter works as expected in multi-adapter setting.
"""
from peft import LoraConfig
from peft.tuners.tuners_utils import BaseTunerLayer
dummy_input = torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device)
for model_id in self.transformers_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
is_peft_loaded = False
model = transformers_class.from_pretrained(model_id).to(torch_device)
logits_original_model = model(dummy_input).logits
peft_config = LoraConfig(init_lora_weights=False)
model.add_adapter(peft_config)
logits_adapter_1 = model(dummy_input)
model.add_adapter(peft_config, adapter_name="adapter-2")
logits_adapter_2 = model(dummy_input)
for _, m in model.named_modules():
if isinstance(m, BaseTunerLayer):
is_peft_loaded = True
break
self.assertTrue(is_peft_loaded)
# dummy generation
_ = model.generate(input_ids=dummy_input)
model.set_adapter("default")
self.assertTrue(model.active_adapters() == ["default"])
self.assertTrue(model.active_adapter() == "default")
model.set_adapter("adapter-2")
self.assertTrue(model.active_adapters() == ["adapter-2"])
self.assertTrue(model.active_adapter() == "adapter-2")
# Logits comparison
self.assertFalse(
torch.allclose(logits_adapter_1.logits, logits_adapter_2.logits, atol=1e-6, rtol=1e-6)
)
self.assertFalse(torch.allclose(logits_original_model, logits_adapter_2.logits, atol=1e-6, rtol=1e-6))
model.set_adapter(["adapter-2", "default"])
self.assertTrue(model.active_adapters() == ["adapter-2", "default"])
self.assertTrue(model.active_adapter() == "adapter-2")
logits_adapter_mixed = model(dummy_input)
self.assertFalse(
torch.allclose(logits_adapter_1.logits, logits_adapter_mixed.logits, atol=1e-6, rtol=1e-6)
)
self.assertFalse(
torch.allclose(logits_adapter_2.logits, logits_adapter_mixed.logits, atol=1e-6, rtol=1e-6)
)
# multi active adapter saving not supported
with self.assertRaises(ValueError), tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
def test_delete_adapter(self):
"""
Enhanced test for `delete_adapter` to handle multiple adapters,
edge cases, and proper error handling.
"""
from peft import LoraConfig
for model_id in self.transformers_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
model = transformers_class.from_pretrained(model_id).to(torch_device)
# Add multiple adapters
peft_config_1 = LoraConfig(init_lora_weights=False)
peft_config_2 = LoraConfig(init_lora_weights=False)
model.add_adapter(peft_config_1, adapter_name="adapter_1")
model.add_adapter(peft_config_2, adapter_name="adapter_2")
# Ensure adapters were added
self.assertIn("adapter_1", model.peft_config)
self.assertIn("adapter_2", model.peft_config)
# Delete a single adapter
model.delete_adapter("adapter_1")
self.assertNotIn("adapter_1", model.peft_config)
self.assertIn("adapter_2", model.peft_config)
# Delete remaining adapter
model.delete_adapter("adapter_2")
self.assertFalse(hasattr(model, "peft_config"))
self.assertFalse(model._hf_peft_config_loaded)
# Re-add adapters for edge case tests
model.add_adapter(peft_config_1, adapter_name="adapter_1")
model.add_adapter(peft_config_2, adapter_name="adapter_2")
# Attempt to delete multiple adapters at once
model.delete_adapter(["adapter_1", "adapter_2"])
self.assertFalse(hasattr(model, "peft_config"))
self.assertFalse(model._hf_peft_config_loaded)
# Test edge cases
msg = re.escape("No adapter loaded. Please load an adapter first.")
with self.assertRaisesRegex(ValueError, msg):
model.delete_adapter("nonexistent_adapter")
model.add_adapter(peft_config_1, adapter_name="adapter_1")
with self.assertRaisesRegex(ValueError, "The following adapter\\(s\\) are not present"):
model.delete_adapter("nonexistent_adapter")
with self.assertRaisesRegex(ValueError, "The following adapter\\(s\\) are not present"):
model.delete_adapter(["adapter_1", "nonexistent_adapter"])
# Deleting with an empty list or None should not raise errors
model.add_adapter(peft_config_2, adapter_name="adapter_2")
model.delete_adapter([]) # No-op
self.assertIn("adapter_1", model.peft_config)
self.assertIn("adapter_2", model.peft_config)
# Deleting duplicate adapter names in the list
model.delete_adapter(["adapter_1", "adapter_1"])
self.assertNotIn("adapter_1", model.peft_config)
self.assertIn("adapter_2", model.peft_config)
@require_torch_accelerator
@require_bitsandbytes
def test_peft_from_pretrained_kwargs(self):
"""
Simple test that tests the basic usage of PEFT model through `from_pretrained` + additional kwargs
and see if the integraiton behaves as expected.
"""
for model_id in self.peft_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
peft_model = transformers_class.from_pretrained(model_id, load_in_8bit=True, device_map="auto")
module = peft_model.model.decoder.layers[0].self_attn.v_proj
self.assertTrue(module.__class__.__name__ == "Linear8bitLt")
self.assertTrue(peft_model.hf_device_map is not None)
# dummy generation
_ = peft_model.generate(input_ids=torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device))
@require_torch_accelerator
@require_bitsandbytes
def test_peft_save_quantized(self):
"""
Simple test that tests the basic usage of PEFT model save_pretrained with quantized base models
"""
# 4bit
for model_id in self.peft_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
peft_model = transformers_class.from_pretrained(model_id, load_in_4bit=True, device_map="auto")
module = peft_model.model.decoder.layers[0].self_attn.v_proj
self.assertTrue(module.__class__.__name__ == "Linear4bit")
self.assertTrue(peft_model.hf_device_map is not None)
with tempfile.TemporaryDirectory() as tmpdirname:
peft_model.save_pretrained(tmpdirname)
self.assertTrue("adapter_model.safetensors" in os.listdir(tmpdirname))
self.assertTrue("adapter_config.json" in os.listdir(tmpdirname))
self.assertTrue("pytorch_model.bin" not in os.listdir(tmpdirname))
self.assertTrue("model.safetensors" not in os.listdir(tmpdirname))
# 8-bit
for model_id in self.peft_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
peft_model = transformers_class.from_pretrained(model_id, load_in_8bit=True, device_map="auto")
module = peft_model.model.decoder.layers[0].self_attn.v_proj
self.assertTrue(module.__class__.__name__ == "Linear8bitLt")
self.assertTrue(peft_model.hf_device_map is not None)
with tempfile.TemporaryDirectory() as tmpdirname:
peft_model.save_pretrained(tmpdirname)
self.assertTrue("adapter_model.safetensors" in os.listdir(tmpdirname))
self.assertTrue("adapter_config.json" in os.listdir(tmpdirname))
self.assertTrue("pytorch_model.bin" not in os.listdir(tmpdirname))
self.assertTrue("model.safetensors" not in os.listdir(tmpdirname))
@require_torch_accelerator
@require_bitsandbytes
def test_peft_save_quantized_regression(self):
"""
Simple test that tests the basic usage of PEFT model save_pretrained with quantized base models
Regression test to make sure everything works as expected before the safetensors integration.
"""
# 4bit
for model_id in self.peft_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
peft_model = transformers_class.from_pretrained(model_id, load_in_4bit=True, device_map="auto")
module = peft_model.model.decoder.layers[0].self_attn.v_proj
self.assertTrue(module.__class__.__name__ == "Linear4bit")
self.assertTrue(peft_model.hf_device_map is not None)
with tempfile.TemporaryDirectory() as tmpdirname:
peft_model.save_pretrained(tmpdirname, safe_serialization=False)
self.assertTrue("adapter_model.bin" in os.listdir(tmpdirname))
self.assertTrue("adapter_config.json" in os.listdir(tmpdirname))
self.assertTrue("pytorch_model.bin" not in os.listdir(tmpdirname))
self.assertTrue("model.safetensors" not in os.listdir(tmpdirname))
# 8-bit
for model_id in self.peft_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
peft_model = transformers_class.from_pretrained(model_id, load_in_8bit=True, device_map="auto")
module = peft_model.model.decoder.layers[0].self_attn.v_proj
self.assertTrue(module.__class__.__name__ == "Linear8bitLt")
self.assertTrue(peft_model.hf_device_map is not None)
with tempfile.TemporaryDirectory() as tmpdirname:
peft_model.save_pretrained(tmpdirname, safe_serialization=False)
self.assertTrue("adapter_model.bin" in os.listdir(tmpdirname))
self.assertTrue("adapter_config.json" in os.listdir(tmpdirname))
self.assertTrue("pytorch_model.bin" not in os.listdir(tmpdirname))
self.assertTrue("model.safetensors" not in os.listdir(tmpdirname))
def test_peft_pipeline(self):
"""
Simple test that tests the basic usage of PEFT model + pipeline
"""
from transformers import pipeline
for adapter_id, base_model_id in zip(self.peft_test_model_ids, self.transformers_test_model_ids):
peft_pipe = pipeline("text-generation", adapter_id)
base_pipe = pipeline("text-generation", base_model_id)
peft_params = list(peft_pipe.model.parameters())
base_params = list(base_pipe.model.parameters())
self.assertNotEqual(len(peft_params), len(base_params)) # Assert we actually loaded the adapter too
_ = peft_pipe("Hello")
def test_peft_add_adapter_with_state_dict(self):
"""
Simple test that tests the basic usage of PEFT model through `from_pretrained`. This test tests if
add_adapter works as expected with a state_dict being passed.
"""
from peft import LoraConfig
dummy_input = torch.LongTensor([[0, 1, 2, 3, 4, 5, 6, 7]]).to(torch_device)
for model_id, peft_model_id in zip(self.transformers_test_model_ids, self.peft_test_model_ids):
for transformers_class in self.transformers_test_model_classes:
model = transformers_class.from_pretrained(model_id).to(torch_device)
peft_config = LoraConfig(init_lora_weights=False)
with self.assertRaises(ValueError):
model.load_adapter(peft_model_id=None)
state_dict_path = hf_hub_download(peft_model_id, "adapter_model.bin")
check_torch_load_is_safe()
dummy_state_dict = torch.load(state_dict_path, weights_only=True)
model.load_adapter(adapter_state_dict=dummy_state_dict, peft_config=peft_config)
with self.assertRaises(ValueError):
model.load_adapter(model.load_adapter(adapter_state_dict=dummy_state_dict, peft_config=None))
self.assertTrue(self._check_lora_correctly_converted(model))
# dummy generation
_ = model.generate(input_ids=dummy_input)
def test_peft_add_adapter_with_state_dict_low_cpu_mem_usage(self):
"""
Check the usage of low_cpu_mem_usage, which is supported in PEFT >= 0.13.0
"""
from peft import LoraConfig
min_version_lcmu = "0.13.0"
is_lcmu_supported = version.parse(importlib.metadata.version("peft")) >= version.parse(min_version_lcmu)
for model_id, peft_model_id in zip(self.transformers_test_model_ids, self.peft_test_model_ids):
for transformers_class in self.transformers_test_model_classes:
model = transformers_class.from_pretrained(model_id).to(torch_device)
peft_config = LoraConfig()
state_dict_path = hf_hub_download(peft_model_id, "adapter_model.bin")
check_torch_load_is_safe()
dummy_state_dict = torch.load(state_dict_path, weights_only=True)
# this should always work
model.load_adapter(
adapter_state_dict=dummy_state_dict, peft_config=peft_config, low_cpu_mem_usage=False
)
if is_lcmu_supported:
# if supported, this should not raise an error
model.load_adapter(
adapter_state_dict=dummy_state_dict,
adapter_name="other",
peft_config=peft_config,
low_cpu_mem_usage=True,
)
# after loading, no meta device should be remaining
self.assertFalse(any((p.device.type == "meta") for p in model.parameters()))
else:
err_msg = r"The version of PEFT you are using does not support `low_cpu_mem_usage` yet"
with self.assertRaisesRegex(ValueError, err_msg):
model.load_adapter(
adapter_state_dict=dummy_state_dict,
adapter_name="other",
peft_config=peft_config,
low_cpu_mem_usage=True,
)
def test_peft_from_pretrained_hub_kwargs(self):
"""
Tests different combinations of PEFT model + from_pretrained + hub kwargs
"""
peft_model_id = "peft-internal-testing/tiny-opt-lora-revision"
# This should not work
with self.assertRaises(OSError):
_ = AutoModelForCausalLM.from_pretrained(peft_model_id)
adapter_kwargs = {"revision": "test"}
# This should work
model = AutoModelForCausalLM.from_pretrained(peft_model_id, adapter_kwargs=adapter_kwargs)
self.assertTrue(self._check_lora_correctly_converted(model))
model = OPTForCausalLM.from_pretrained(peft_model_id, adapter_kwargs=adapter_kwargs)
self.assertTrue(self._check_lora_correctly_converted(model))
adapter_kwargs = {"revision": "main", "subfolder": "test_subfolder"}
model = AutoModelForCausalLM.from_pretrained(peft_model_id, adapter_kwargs=adapter_kwargs)
self.assertTrue(self._check_lora_correctly_converted(model))
model = OPTForCausalLM.from_pretrained(peft_model_id, adapter_kwargs=adapter_kwargs)
self.assertTrue(self._check_lora_correctly_converted(model))
def test_peft_from_pretrained_unexpected_keys_warning(self):
"""
Test for warning when loading a PEFT checkpoint with unexpected keys.
"""
from peft import LoraConfig
logger = logging.get_logger("transformers.integrations.peft")
for model_id, peft_model_id in zip(self.transformers_test_model_ids, self.peft_test_model_ids):
for transformers_class in self.transformers_test_model_classes:
model = transformers_class.from_pretrained(model_id).to(torch_device)
peft_config = LoraConfig()
state_dict_path = hf_hub_download(peft_model_id, "adapter_model.bin")
check_torch_load_is_safe()
dummy_state_dict = torch.load(state_dict_path, weights_only=True)
# add unexpected key
dummy_state_dict["foobar"] = next(iter(dummy_state_dict.values()))
with CaptureLogger(logger) as cl:
model.load_adapter(
adapter_state_dict=dummy_state_dict, peft_config=peft_config, low_cpu_mem_usage=False
)
msg = "Loading adapter weights from state_dict led to unexpected keys not found in the model: foobar"
self.assertIn(msg, cl.out)
def test_peft_from_pretrained_missing_keys_warning(self):
"""
Test for warning when loading a PEFT checkpoint with missing keys.
"""
from peft import LoraConfig
logger = logging.get_logger("transformers.integrations.peft")
for model_id, peft_model_id in zip(self.transformers_test_model_ids, self.peft_test_model_ids):
for transformers_class in self.transformers_test_model_classes:
model = transformers_class.from_pretrained(model_id).to(torch_device)
peft_config = LoraConfig()
state_dict_path = hf_hub_download(peft_model_id, "adapter_model.bin")
check_torch_load_is_safe()
dummy_state_dict = torch.load(state_dict_path, weights_only=True)
# remove a key so that we have missing keys
key = next(iter(dummy_state_dict.keys()))
del dummy_state_dict[key]
with CaptureLogger(logger) as cl:
model.load_adapter(
adapter_state_dict=dummy_state_dict,
peft_config=peft_config,
low_cpu_mem_usage=False,
adapter_name="other",
)
# Here we need to adjust the key name a bit to account for PEFT-specific naming.
# 1. Remove PEFT-specific prefix
# If merged after dropping Python 3.8, we can use: key = key.removeprefix(peft_prefix)
peft_prefix = "base_model.model."
key = key[len(peft_prefix) :]
# 2. Insert adapter name
prefix, _, suffix = key.rpartition(".")
key = f"{prefix}.other.{suffix}"
msg = f"Loading adapter weights from state_dict led to missing keys in the model: {key}"
self.assertIn(msg, cl.out)
def test_peft_load_adapter_training_inference_mode_true(self):
"""
By default, when loading an adapter, the whole model should be in eval mode and no parameter should have
requires_grad=False.
"""
for model_id in self.peft_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
peft_model = transformers_class.from_pretrained(model_id).to(torch_device)
with tempfile.TemporaryDirectory() as tmpdirname:
peft_model.save_pretrained(tmpdirname)
model = transformers_class.from_pretrained(peft_model.config._name_or_path)
model.load_adapter(tmpdirname)
assert not any(p.requires_grad for p in model.parameters())
assert not any(m.training for m in model.modules())
del model
def test_peft_load_adapter_training_inference_mode_false(self):
"""
When passing is_trainable=True, the LoRA modules should be in training mode and their parameters should have
requires_grad=True.
"""
for model_id in self.peft_test_model_ids:
for transformers_class in self.transformers_test_model_classes:
peft_model = transformers_class.from_pretrained(model_id).to(torch_device)
with tempfile.TemporaryDirectory() as tmpdirname:
peft_model.save_pretrained(tmpdirname)
model = transformers_class.from_pretrained(peft_model.config._name_or_path)
model.load_adapter(tmpdirname, is_trainable=True)
for name, module in model.named_modules():
if len(list(module.children())):
# only check leaf modules
continue
if "lora_" in name:
assert module.training
assert all(p.requires_grad for p in module.parameters())
else:
assert not module.training
assert all(not p.requires_grad for p in module.parameters())
def test_prefix_tuning_trainer_load_best_model_at_end_error(self):
# Original issue: https://github.com/huggingface/peft/issues/2256
# There is a potential error when using load_best_model_at_end=True with a prompt learning PEFT method. This is
# because Trainer uses load_adapter under the hood but with some prompt learning methods, there is an
# optimization on the saved model to remove parameters that are not required for inference, which in turn
# requires a change to the model architecture. This is why load_adapter will fail in such cases and users should
# instead set load_best_model_at_end=False and use PeftModel.from_pretrained. As this is not obvious, we now
# intercept the error and add a helpful error message.
# This test checks this error message. It also tests the "happy path" (i.e. no error) when using LoRA.
from peft import LoraConfig, PrefixTuningConfig, TaskType, get_peft_model
# create a small sequence classification dataset (binary classification)
dataset = []
for i, row in enumerate(os.__doc__.splitlines()):
dataset.append({"text": row, "label": i % 2})
ds_train = Dataset.from_list(dataset)
ds_valid = ds_train
datasets = DatasetDict(
{
"train": ds_train,
"val": ds_valid,
}
)
# tokenizer for peft-internal-testing/tiny-OPTForCausalLM-lora cannot be loaded, thus using
# hf-internal-testing/tiny-random-OPTForCausalLM
model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left", model_type="opt")
def tokenize_function(examples):
return tokenizer(examples["text"], max_length=128, truncation=True, padding="max_length")
tokenized_datasets = datasets.map(tokenize_function, batched=True)
# lora works, prefix-tuning is expected to raise an error
peft_configs = {
"lora": LoraConfig(task_type=TaskType.SEQ_CLS),
"prefix-tuning": PrefixTuningConfig(
task_type=TaskType.SEQ_CLS,
inference_mode=False,
prefix_projection=True,
num_virtual_tokens=10,
),
}
for peft_type, peft_config in peft_configs.items():
base_model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=2)
base_model.config.pad_token_id = tokenizer.pad_token_id
peft_model = get_peft_model(base_model, peft_config)
with tempfile.TemporaryDirectory() as tmpdirname:
training_args = TrainingArguments(
output_dir=tmpdirname,
num_train_epochs=3,
eval_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
)
trainer = Trainer(
model=peft_model,
args=training_args,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["val"],
)
if peft_type == "lora":
# LoRA works with load_best_model_at_end
trainer.train()
else:
# prefix tuning does not work, but at least users should get a helpful error message
msg = "When using prompt learning PEFT methods such as PREFIX_TUNING"
with self.assertRaisesRegex(RuntimeError, msg):
trainer.train()
def test_peft_pipeline_no_warning(self):
"""
Test to verify that the warning message "The model 'PeftModel' is not supported for text-generation"
does not appear when using PeftModel with text-generation pipeline.
"""
from peft import PeftModel
from transformers import pipeline
ADAPTER_PATH = "peft-internal-testing/tiny-OPTForCausalLM-lora"
BASE_PATH = "hf-internal-testing/tiny-random-OPTForCausalLM"
# Input text for testing
text = "Who is a Elon Musk?"
model = AutoModelForCausalLM.from_pretrained(
BASE_PATH,
device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(BASE_PATH)
lora_model = PeftModel.from_pretrained(
model,
ADAPTER_PATH,
device_map="auto",
)
# Create pipeline with PEFT model while capturing log output
# Check that the warning message is not present in the logs
pipeline_logger = logging.get_logger("transformers.pipelines.base")
with self.assertNoLogs(pipeline_logger, logging.ERROR):
lora_generator = pipeline(
task="text-generation",
model=lora_model,
tokenizer=tokenizer,
max_length=10,
)
# Generate text to verify pipeline works
_ = lora_generator(text)