From de11d0bdf0286f64616ea0d4b5778c41151a2d22 Mon Sep 17 00:00:00 2001
From: miRx923 <94078303+miRx923@users.noreply.github.com>
Date: Fri, 5 Apr 2024 14:04:50 +0200
Subject: [PATCH] Update quantizer_bnb_4bit.py: In the ValueError string there
 should be "....you need to set `llm_int8_enable_fp32_cpu_offload=True`...."
 instead of "`load_in_8bit_fp32_cpu_offload=True`". (#30013)

* Update quantizer_bnb_4bit.py

There is an mistake in ValueError on line 86 of quantizer_bnb_4bit.py. In the error string there should be "....you need to set `llm_int8_enable_fp32_cpu_offload=True`...." instead of "load_in_8bit_fp32_cpu_offload=True". I think you updated the BitsAndBytesConfig() arguments, but forgot to change the ValueError in quantizer_bnb_4bit.py.

* Update quantizer_bnb_4bit.py

Changed ValueError string "...you need to set load_in_8bit_fp32_cpu_offload=True..." to "....you need to set llm_int8_enable_fp32_cpu_offload=True...."
---
 src/transformers/quantizers/quantizer_bnb_4bit.py | 2 +-
 src/transformers/quantizers/quantizer_bnb_8bit.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/transformers/quantizers/quantizer_bnb_4bit.py b/src/transformers/quantizers/quantizer_bnb_4bit.py
index b98eebba183..112cfd644f1 100644
--- a/src/transformers/quantizers/quantizer_bnb_4bit.py
+++ b/src/transformers/quantizers/quantizer_bnb_4bit.py
@@ -87,7 +87,7 @@ class Bnb4BitHfQuantizer(HfQuantizer):
                     """
                     Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the
                     quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules
-                    in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to
+                    in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to
                     `from_pretrained`. Check
                     https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
                     for more details.
diff --git a/src/transformers/quantizers/quantizer_bnb_8bit.py b/src/transformers/quantizers/quantizer_bnb_8bit.py
index f4249b69d09..8ad60a03e23 100644
--- a/src/transformers/quantizers/quantizer_bnb_8bit.py
+++ b/src/transformers/quantizers/quantizer_bnb_8bit.py
@@ -87,7 +87,7 @@ class Bnb8BitHfQuantizer(HfQuantizer):
                     """
                     Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the
                     quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules
-                    in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to
+                    in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to
                     `from_pretrained`. Check
                     https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
                     for more details.