restrict cache allocator to non quantized model (#36428)

This commit is contained in:
Marc Sun 2025-02-26 22:16:15 +01:00 committed by GitHub
parent a7fbab33ae
commit 8ede897c30
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4839,7 +4839,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
model.expected_keys = expected_keys
if device_map is not None:
expanded_device_map = expand_device_map(device_map, original_loaded_keys, start_prefix)
caching_allocator_warmup(model, expanded_device_map, dtype)
if hf_quantizer is None:
caching_allocator_warmup(model_to_load, expanded_device_map, dtype)
if device_map is not None and is_safetensors:
param_device_map = expanded_device_map