mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 04:40:06 +06:00
Fix hqq issue (#38551)
Some checks are pending
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Waiting to run
Build documentation / build (push) Waiting to run
New model PR merged notification / Notify new model (push) Waiting to run
Slow tests on important models (on Push - A10) / Get all modified files (push) Waiting to run
Slow tests on important models (on Push - A10) / Slow & FA2 tests (push) Blocked by required conditions
Self-hosted runner (push-caller) / Check if setup was changed (push) Waiting to run
Self-hosted runner (push-caller) / build-docker-containers (push) Blocked by required conditions
Self-hosted runner (push-caller) / Trigger Push CI (push) Blocked by required conditions
Secret Leaks / trufflehog (push) Waiting to run
Update Transformers metadata / build_and_package (push) Waiting to run
Some checks are pending
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Waiting to run
Build documentation / build (push) Waiting to run
New model PR merged notification / Notify new model (push) Waiting to run
Slow tests on important models (on Push - A10) / Get all modified files (push) Waiting to run
Slow tests on important models (on Push - A10) / Slow & FA2 tests (push) Blocked by required conditions
Self-hosted runner (push-caller) / Check if setup was changed (push) Waiting to run
Self-hosted runner (push-caller) / build-docker-containers (push) Blocked by required conditions
Self-hosted runner (push-caller) / Trigger Push CI (push) Blocked by required conditions
Secret Leaks / trufflehog (push) Waiting to run
Update Transformers metadata / build_and_package (push) Waiting to run
* bc * style
This commit is contained in:
parent
279000bb70
commit
0f41c41a46
@ -135,7 +135,11 @@ class HqqHfQuantizer(HfQuantizer):
|
||||
|
||||
# Append new expected layers based on _ref_keys
|
||||
_ref_keys = HQQLinear(
|
||||
linear_layer=None, quant_config=None, compute_dtype=torch.float16, device="cpu"
|
||||
linear_layer=None,
|
||||
quant_config=None,
|
||||
compute_dtype=torch.float16,
|
||||
device="cpu",
|
||||
del_orig=False,
|
||||
).state_dict_keys() - {"bias"}
|
||||
|
||||
# Clean-up
|
||||
@ -224,6 +228,7 @@ class HqqHfQuantizer(HfQuantizer):
|
||||
quant_config=None,
|
||||
compute_dtype=self.torch_dtype,
|
||||
device=target_device,
|
||||
del_orig=False,
|
||||
)
|
||||
|
||||
hqq_layer.load_state_dict(module_state_dict)
|
||||
|
Loading…
Reference in New Issue
Block a user