update bnb ground truth (#39117)

* update bnb resulte Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * set seed to avoid sampling different results Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix int8 tests Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix typo Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * add comments Signed-off-by: jiqing-feng <jiqing.feng@intel.com> --------- Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
2025-07-03 12:50:06 +06:00 · 2025-07-02 02:06:37 +08:00 · 2025-07-02 02:06:37 +08:00 · db2f535443
commit db2f535443
parent 260846efad
2 changed files with 10 additions and 0 deletions
--- a/tests/quantization/bnb/test_4bit.py
+++ b/tests/quantization/bnb/test_4bit.py
@ -27,6 +27,7 @@ from transformers import (
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
+    set_seed,
 )
 from transformers.models.opt.modeling_opt import OPTAttention
 from transformers.testing_utils import (
@ -111,6 +112,8 @@ class Base4bitTest(unittest.TestCase):
    EXPECTED_OUTPUTS.add("Hello my name is John Doe, I am a student at the University")
    EXPECTED_OUTPUTS.add("Hello my name is John and I am 25 years old.")
    EXPECTED_OUTPUTS.add("Hello my name is John and I am a student at the University of")
+    # Expected values on Intel XPU and NV A100
+    EXPECTED_OUTPUTS.add("Hello my name is Alina. I have been working as a professional")
    MAX_NEW_TOKENS = 10

    def setUp(self):
@ -513,6 +516,8 @@ class Pipeline4BitTest(Base4bitTest):
            max_new_tokens=self.MAX_NEW_TOKENS,
        )

+        # Avoid sampling different outputs
+        set_seed(42)
        # Real second forward pass
        pipeline_output = self.pipe(self.input_text)
        self.assertIn(pipeline_output[0]["generated_text"], self.EXPECTED_OUTPUTS)
--- a/tests/quantization/bnb/test_mixed_int8.py
+++ b/tests/quantization/bnb/test_mixed_int8.py
@ -27,6 +27,7 @@ from transformers import (
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
+    set_seed,
 )
 from transformers.models.opt.modeling_opt import OPTAttention
 from transformers.testing_utils import (
@ -113,6 +114,8 @@ class BaseMixedInt8Test(unittest.TestCase):
    MAX_NEW_TOKENS = 10
    # Expected values with offload
    EXPECTED_OUTPUTS.add("Hello my name is John and I am a professional photographer based in")
+    # Expected values on Intel XPU and NV A100
+    EXPECTED_OUTPUTS.add("Hello my name is Alina. I have been working as a professional")

    def setUp(self):
        # Models and tokenizer
@ -649,6 +652,8 @@ class MixedInt8TestPipeline(BaseMixedInt8Test):
            max_new_tokens=self.MAX_NEW_TOKENS,
        )

+        # Avoid sampling different outputs
+        set_seed(42)
        # Real second forward pass
        pipeline_output = self.pipe(self.input_text)
        self.assertIn(pipeline_output[0]["generated_text"], self.EXPECTED_OUTPUTS)