mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
parent
dfe9a31973
commit
edea08a6b0
@ -2578,8 +2578,11 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
raise ValueError(
|
||||
"""
|
||||
Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit
|
||||
the quantized model. If you have set a value for `max_memory` you should increase that. To have
|
||||
an idea of the modules that are set on the CPU or RAM you can print model.hf_device_map.
|
||||
the quantized model. If you want to dispatch the model on the CPU or the disk while keeping
|
||||
these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom
|
||||
`device_map` to `from_pretrained`. Check
|
||||
https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
|
||||
for more details.
|
||||
"""
|
||||
)
|
||||
del device_map_without_lm_head
|
||||
|
Loading…
Reference in New Issue
Block a user