fix hqq due to recent modeling changes (#36771)

* fix-hqq

* style

* test
This commit is contained in:
Marc Sun 2025-03-18 12:20:27 +01:00 committed by GitHub
parent e959530b8f
commit 3017536ebf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 32 additions and 1 deletions

View File

@ -169,7 +169,12 @@ class HqqHfQuantizer(HfQuantizer):
and tensor_name != "bias" and tensor_name != "bias"
) )
else: else:
return isinstance(module, torch.nn.Linear) and tensor_name == "weight" # we need a special path for bias since hqq overwrote load_state_dict for this layer
return (
isinstance(module, torch.nn.Linear)
and tensor_name == "weight"
or (isinstance(module, HQQLinear) and tensor_name == "bias")
)
def create_quantized_param( def create_quantized_param(
self, self,
@ -194,6 +199,10 @@ class HqqHfQuantizer(HfQuantizer):
parent_module = find_parent(model, layer_name) parent_module = find_parent(model, layer_name)
node = layer_name.split(".")[-1] node = layer_name.split(".")[-1]
if tensor_name == "bias":
# this should already be set
return
# set module state_dict # set module state_dict
module_state_dict = {} module_state_dict = {}
for k, v in state_dict.items(): for k, v in state_dict.items():

View File

@ -145,6 +145,28 @@ class HQQTestMultiGPU(unittest.TestCase):
check_forward(self, hqq_runner.model) check_forward(self, hqq_runner.model)
@slow
@require_torch_gpu
@require_accelerate
@require_hqq
class HQQTestBias(unittest.TestCase):
def tearDown(self):
cleanup()
def test_fp16_quantized_model(self):
"""
Simple LLM model testing fp16 with bias
"""
quant_config = HqqConfig(nbits=8, group_size=64)
hqq_runner = HQQLLMRunner(
model_id="facebook/opt-125m", quant_config=quant_config, compute_dtype=torch.float16, device=torch_device
)
check_hqqlayer(self, hqq_runner.model.model.decoder.layers[0].self_attn.v_proj)
check_forward(self, hqq_runner.model)
@slow @slow
@require_torch_gpu @require_torch_gpu
@require_accelerate @require_accelerate