Fix hqq issue (#38551)

* bc * style
2025-07-03 04:40:06 +06:00 · 2025-06-03 17:58:31 +02:00 · 2025-06-03 17:58:31 +02:00 · 0f41c41a46
commit 0f41c41a46
parent 279000bb70
1 changed files with 6 additions and 1 deletions
--- a/src/transformers/quantizers/quantizer_hqq.py
+++ b/src/transformers/quantizers/quantizer_hqq.py
@ -135,7 +135,11 @@ class HqqHfQuantizer(HfQuantizer):

            # Append new expected layers based on _ref_keys
            _ref_keys = HQQLinear(
-                linear_layer=None, quant_config=None, compute_dtype=torch.float16, device="cpu"
+                linear_layer=None,
+                quant_config=None,
+                compute_dtype=torch.float16,
+                device="cpu",
+                del_orig=False,
            ).state_dict_keys() - {"bias"}

            # Clean-up
@ -224,6 +228,7 @@ class HqqHfQuantizer(HfQuantizer):
                    quant_config=None,
                    compute_dtype=self.torch_dtype,
                    device=target_device,
+                    del_orig=False,
                )

            hqq_layer.load_state_dict(module_state_dict)