From 0f41c41a463ddd447e4248f4b5f9eb6728e8191d Mon Sep 17 00:00:00 2001
From: Marc Sun <57196510+SunMarc@users.noreply.github.com>
Date: Tue, 3 Jun 2025 17:58:31 +0200
Subject: [PATCH] Fix hqq issue (#38551)

* bc

* style
---
 src/transformers/quantizers/quantizer_hqq.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/transformers/quantizers/quantizer_hqq.py b/src/transformers/quantizers/quantizer_hqq.py
index 38d8a15cbfc..07dda2588cb 100755
--- a/src/transformers/quantizers/quantizer_hqq.py
+++ b/src/transformers/quantizers/quantizer_hqq.py
@@ -135,7 +135,11 @@ class HqqHfQuantizer(HfQuantizer):
 
             # Append new expected layers based on _ref_keys
             _ref_keys = HQQLinear(
-                linear_layer=None, quant_config=None, compute_dtype=torch.float16, device="cpu"
+                linear_layer=None,
+                quant_config=None,
+                compute_dtype=torch.float16,
+                device="cpu",
+                del_orig=False,
             ).state_dict_keys() - {"bias"}
 
             # Clean-up
@@ -224,6 +228,7 @@ class HqqHfQuantizer(HfQuantizer):
                     quant_config=None,
                     compute_dtype=self.torch_dtype,
                     device=target_device,
+                    del_orig=False,
                 )
 
             hqq_layer.load_state_dict(module_state_dict)