diff --git a/src/transformers/quantizers/quantizer_hqq.py b/src/transformers/quantizers/quantizer_hqq.py
index 93ab958a30c..60d334fdd9b 100755
--- a/src/transformers/quantizers/quantizer_hqq.py
+++ b/src/transformers/quantizers/quantizer_hqq.py
@@ -169,7 +169,12 @@ class HqqHfQuantizer(HfQuantizer):
                 and tensor_name != "bias"
             )
         else:
-            return isinstance(module, torch.nn.Linear) and tensor_name == "weight"
+            # we need a special path for bias since hqq overwrote load_state_dict for this layer
+            return (
+                isinstance(module, torch.nn.Linear)
+                and tensor_name == "weight"
+                or (isinstance(module, HQQLinear) and tensor_name == "bias")
+            )
 
     def create_quantized_param(
         self,
@@ -194,6 +199,10 @@ class HqqHfQuantizer(HfQuantizer):
         parent_module = find_parent(model, layer_name)
         node = layer_name.split(".")[-1]
 
+        if tensor_name == "bias":
+            # this should already be set
+            return
+
         # set module state_dict
         module_state_dict = {}
         for k, v in state_dict.items():
diff --git a/tests/quantization/hqq/test_hqq.py b/tests/quantization/hqq/test_hqq.py
index c25aada6ed4..7335a937086 100755
--- a/tests/quantization/hqq/test_hqq.py
+++ b/tests/quantization/hqq/test_hqq.py
@@ -145,6 +145,28 @@ class HQQTestMultiGPU(unittest.TestCase):
         check_forward(self, hqq_runner.model)
 
 
+@slow
+@require_torch_gpu
+@require_accelerate
+@require_hqq
+class HQQTestBias(unittest.TestCase):
+    def tearDown(self):
+        cleanup()
+
+    def test_fp16_quantized_model(self):
+        """
+        Simple LLM model testing fp16 with bias
+        """
+        quant_config = HqqConfig(nbits=8, group_size=64)
+
+        hqq_runner = HQQLLMRunner(
+            model_id="facebook/opt-125m", quant_config=quant_config, compute_dtype=torch.float16, device=torch_device
+        )
+
+        check_hqqlayer(self, hqq_runner.model.model.decoder.layers[0].self_attn.v_proj)
+        check_forward(self, hqq_runner.model)
+
+
 @slow
 @require_torch_gpu
 @require_accelerate