Fix Failing GPTQ tests (#36666)

fix tests
This commit is contained in:
Mohamed Mekkouri 2025-03-12 20:03:02 +01:00 committed by GitHub
parent c7eb95581a
commit 0013ba61e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -94,6 +94,7 @@ class GPTQTest(unittest.TestCase):
EXPECTED_OUTPUTS.add("Hello my name is Aiden, I am a student at the University")
EXPECTED_OUTPUTS.add("Hello my name is Nate and I am a member of the N")
EXPECTED_OUTPUTS.add("Hello my name is Nellie and I am a student at the")
EXPECTED_OUTPUTS.add("Hello my name is Nate and I am a new member of the")
# this seems a little small considering that we are doing 4bit quant but we have a small model and ww don't quantize the embeddings
EXPECTED_RELATIVE_DIFFERENCE = 1.664253062
@ -260,7 +261,9 @@ class GPTQTest(unittest.TestCase):
if self.device_map == "cpu":
quant_type = "ipex" if is_ipex_available() else "torch"
else:
quant_type = "exllama"
# We expecte tritonv2 to be used here, because exllama backend doesn't support packing https://github.com/ModelCloud/GPTQModel/issues/1354
# TODO: Remove this once GPTQModel exllama kernels supports packing
quant_type = "tritonv2"
quantized_model_from_saved = AutoModelForCausalLM.from_pretrained(
tmpdirname, device_map=self.device_map
)
@ -424,9 +427,17 @@ class GPTQTestExllamaV2(unittest.TestCase):
cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_name, use_fast=True)
def test_quantized_layers_type(self):
if is_auto_gptq_available() and not is_gptqmodel_available():
self.assertEqual(
self.quantized_model.model.layers[0].self_attn.k_proj.QUANT_TYPE,
"exllama" if is_gptqmodel_available() else "exllamav2",
"exllamav2",
)
else:
# We expecte tritonv2 to be used here, because exllama backend doesn't support packing https://github.com/ModelCloud/GPTQModel/issues/1354
# TODO: Remove this once GPTQModel exllama kernels supports packing
self.assertEqual(
self.quantized_model.model.layers[0].self_attn.k_proj.QUANT_TYPE,
"tritonv2",
)
def check_inference_correctness(self, model):