diff --git a/src/transformers/modeling_gguf_pytorch_utils.py b/src/transformers/modeling_gguf_pytorch_utils.py index c1aa57d790a..3ce50f8fec2 100644 --- a/src/transformers/modeling_gguf_pytorch_utils.py +++ b/src/transformers/modeling_gguf_pytorch_utils.py @@ -258,6 +258,8 @@ TENSOR_PROCESSORS = { def read_field(reader, field): + if field not in reader.fields: + return [] value = reader.fields[field] return [_gguf_parse_value(value.parts[_data_index], value.types) for _data_index in value.data] @@ -369,6 +371,7 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo parsed_parameters = {k: {} for k in GGUF_TO_TRANSFORMERS_MAPPING} architecture = read_field(reader, "general.architecture")[0] + # NOTE: Some GGUF checkpoints may miss `general.name` field in metadata model_name = read_field(reader, "general.name") updated_architecture = None diff --git a/tests/quantization/ggml/test_ggml.py b/tests/quantization/ggml/test_ggml.py index d1f53b986cc..52c700b16f3 100644 --- a/tests/quantization/ggml/test_ggml.py +++ b/tests/quantization/ggml/test_ggml.py @@ -298,6 +298,7 @@ class GgufModelTests(unittest.TestCase): gemma2_model_id = "bartowski/gemma-2-2b-it-GGUF" original_gemma3_text_model_id = "google/gemma-3-1b-it" original_gemma3_vision_model_id = "google/gemma-3-4b-it" + gemma3_qat_model_id = "google/gemma-3-1b-it-qat-q4_0-gguf" gemma3_text_model_id = "unsloth/gemma-3-1b-it-GGUF" gemma3_vision_model_id = "unsloth/gemma-3-4b-it-GGUF" @@ -329,7 +330,7 @@ class GgufModelTests(unittest.TestCase): q3_k_gemma2_model_id = "gemma-2-2b-it-Q3_K_L.gguf" q8_0_gemma2_model_id = "gemma-2-2b-it-Q8_0.gguf" fp32_gemma2_model_id = "gemma-2-2b-it-f32.gguf" - q2_k_gemma3_text_model_id = "gemma-3-1b-it-Q2_K.gguf" + q4_0_gemma3_qat_model_id = "gemma-3-1b-it-q4_0.gguf" bf16_gemma3_text_model_id = "gemma-3-1b-it-BF16.gguf" bf16_gemma3_vision_model_id = "gemma-3-4b-it-BF16.gguf" @@ -889,19 +890,20 @@ class GgufModelTests(unittest.TestCase): else: raise ValueError(f"Layer {layer_name} is not presented in GGUF model") + @require_read_token @unittest.skipUnless(is_gguf_available("0.16.0"), "test requires gguf version >= 0.16.0") - def test_gemma3_text_q2_k(self): + def test_gemma3_qat_q4_0(self): model = AutoModelForCausalLM.from_pretrained( - self.gemma3_text_model_id, - gguf_file=self.q2_k_gemma3_text_model_id, + self.gemma3_qat_model_id, + gguf_file=self.q4_0_gemma3_qat_model_id, torch_dtype=torch.float16, ) - tokenizer = AutoTokenizer.from_pretrained(self.gemma3_text_model_id, gguf_file=self.q2_k_gemma3_text_model_id) + tokenizer = AutoTokenizer.from_pretrained(self.gemma3_qat_model_id, gguf_file=self.q4_0_gemma3_qat_model_id) text = tokenizer(self.example_text, return_tensors="pt")["input_ids"] out = model.generate(text, max_new_tokens=10) - EXPECTED_TEXT = "Hello,\n\nI'm looking for a small," + EXPECTED_TEXT = 'Hello with the prompt, "What is the best way' self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT) @require_read_token