From 8ede897c30372c7c01a2ce12323eb08295543b28 Mon Sep 17 00:00:00 2001 From: Marc Sun <57196510+SunMarc@users.noreply.github.com> Date: Wed, 26 Feb 2025 22:16:15 +0100 Subject: [PATCH] restrict cache allocator to non quantized model (#36428) --- src/transformers/modeling_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index e8434e8e9ed..db318156aca 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -4839,7 +4839,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix model.expected_keys = expected_keys if device_map is not None: expanded_device_map = expand_device_map(device_map, original_loaded_keys, start_prefix) - caching_allocator_warmup(model, expanded_device_map, dtype) + if hf_quantizer is None: + caching_allocator_warmup(model_to_load, expanded_device_map, dtype) if device_map is not None and is_safetensors: param_device_map = expanded_device_map