mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 10:12:23 +06:00
restrict cache allocator to non quantized model (#36428)
This commit is contained in:
parent
a7fbab33ae
commit
8ede897c30
@ -4839,7 +4839,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
model.expected_keys = expected_keys
|
||||
if device_map is not None:
|
||||
expanded_device_map = expand_device_map(device_map, original_loaded_keys, start_prefix)
|
||||
caching_allocator_warmup(model, expanded_device_map, dtype)
|
||||
if hf_quantizer is None:
|
||||
caching_allocator_warmup(model_to_load, expanded_device_map, dtype)
|
||||
|
||||
if device_map is not None and is_safetensors:
|
||||
param_device_map = expanded_device_map
|
||||
|
Loading…
Reference in New Issue
Block a user