mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Fix the bitsandbytes
error formatting ("Some modules are dispatched on ...") (#30494)
Fix the `bitsandbytes` error when some modules are not properly offloaded.
This commit is contained in:
parent
19cfdf0fac
commit
59e715f71c
@ -84,14 +84,12 @@ class Bnb4BitHfQuantizer(HfQuantizer):
|
||||
}
|
||||
if "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
|
||||
raise ValueError(
|
||||
"""
|
||||
Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the
|
||||
quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules
|
||||
in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to
|
||||
`from_pretrained`. Check
|
||||
https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
|
||||
for more details.
|
||||
"""
|
||||
"Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
|
||||
"quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
|
||||
"in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to "
|
||||
"`from_pretrained`. Check "
|
||||
"https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "
|
||||
"for more details. "
|
||||
)
|
||||
|
||||
if version.parse(importlib.metadata.version("bitsandbytes")) < version.parse("0.39.0"):
|
||||
|
@ -84,14 +84,12 @@ class Bnb8BitHfQuantizer(HfQuantizer):
|
||||
}
|
||||
if "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
|
||||
raise ValueError(
|
||||
"""
|
||||
Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the
|
||||
quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules
|
||||
in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to
|
||||
`from_pretrained`. Check
|
||||
https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
|
||||
for more details.
|
||||
"""
|
||||
"Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
|
||||
"quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
|
||||
"in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to "
|
||||
"`from_pretrained`. Check "
|
||||
"https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "
|
||||
"for more details. "
|
||||
)
|
||||
|
||||
if version.parse(importlib.metadata.version("bitsandbytes")) < version.parse("0.37.2"):
|
||||
|
Loading…
Reference in New Issue
Block a user