From cde4ac72247a73ea95d775dd94a0e9bce23edbe6 Mon Sep 17 00:00:00 2001 From: shanjiaz Date: Wed, 25 Jun 2025 12:50:09 -0400 Subject: [PATCH 1/2] disable gradient calculation for int weights Signed-off-by: shanjiaz --- .../quantizers/quantizer_compressed_tensors.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/transformers/quantizers/quantizer_compressed_tensors.py b/src/transformers/quantizers/quantizer_compressed_tensors.py index 7df4fb7ca90..e0fc1b9c535 100644 --- a/src/transformers/quantizers/quantizer_compressed_tensors.py +++ b/src/transformers/quantizers/quantizer_compressed_tensors.py @@ -124,6 +124,21 @@ class CompressedTensorsHfQuantizer(HfQuantizer): elif not self.quantization_config.is_quantization_compressed: apply_quantization_config(model, ct_quantization_config) + # Identify quantized modules with integer weights/activations + quant_targets = set() + if ct_quantization_config: + for group in ct_quantization_config.config_groups.values(): + if group.weights.type.startswith("int") or ( + group.input_activations and group.input_activations.type.startswith("int") + ): + quant_targets.update(group.targets) + + # Disable gradient computation for quantized int modules + for _, module in model.named_modules(): + if type(module).__name__ in quant_targets: + for param in module.parameters(): + param.requires_grad = False + def _process_model_after_weight_loading(self, model, **kwargs): """Decompress loaded model if necessary - need for qat""" From 8d9de7af1551d226f300825f47451c3e309eb253 Mon Sep 17 00:00:00 2001 From: shanjiaz <43143795+shanjiaz@users.noreply.github.com> Date: Wed, 25 Jun 2025 14:37:25 -0400 Subject: [PATCH 2/2] Update src/transformers/quantizers/quantizer_compressed_tensors.py Co-authored-by: Kyle Sayers --- src/transformers/quantizers/quantizer_compressed_tensors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/quantizers/quantizer_compressed_tensors.py b/src/transformers/quantizers/quantizer_compressed_tensors.py index e0fc1b9c535..b99aea4ba60 100644 --- a/src/transformers/quantizers/quantizer_compressed_tensors.py +++ b/src/transformers/quantizers/quantizer_compressed_tensors.py @@ -134,7 +134,7 @@ class CompressedTensorsHfQuantizer(HfQuantizer): quant_targets.update(group.targets) # Disable gradient computation for quantized int modules - for _, module in model.named_modules(): + for module in model.modules(): if type(module).__name__ in quant_targets: for param in module.parameters(): param.requires_grad = False