From 0f41c41a463ddd447e4248f4b5f9eb6728e8191d Mon Sep 17 00:00:00 2001 From: Marc Sun <57196510+SunMarc@users.noreply.github.com> Date: Tue, 3 Jun 2025 17:58:31 +0200 Subject: [PATCH] Fix hqq issue (#38551) * bc * style --- src/transformers/quantizers/quantizer_hqq.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/transformers/quantizers/quantizer_hqq.py b/src/transformers/quantizers/quantizer_hqq.py index 38d8a15cbfc..07dda2588cb 100755 --- a/src/transformers/quantizers/quantizer_hqq.py +++ b/src/transformers/quantizers/quantizer_hqq.py @@ -135,7 +135,11 @@ class HqqHfQuantizer(HfQuantizer): # Append new expected layers based on _ref_keys _ref_keys = HQQLinear( - linear_layer=None, quant_config=None, compute_dtype=torch.float16, device="cpu" + linear_layer=None, + quant_config=None, + compute_dtype=torch.float16, + device="cpu", + del_orig=False, ).state_dict_keys() - {"bias"} # Clean-up @@ -224,6 +228,7 @@ class HqqHfQuantizer(HfQuantizer): quant_config=None, compute_dtype=self.torch_dtype, device=target_device, + del_orig=False, ) hqq_layer.load_state_dict(module_state_dict)