This commit is contained in:
Marc Sun 2025-07-02 17:03:51 -04:00 committed by GitHub
commit e4582ff16e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 10 additions and 16 deletions

View File

@ -363,3 +363,9 @@ class Bnb4BitHfQuantizer(HfQuantizer):
model, self.modules_to_not_convert, quantization_config=self.quantization_config model, self.modules_to_not_convert, quantization_config=self.quantization_config
) )
return model return model
@property
def is_compileable(self) -> bool:
# Compatible with PyTorch 2.4+ for fullgraph=False.
# Requires PyTorch 2.8 nightly for fullgraph=True.
return version.parse(importlib.metadata.version("bitsandbytes")) >= version.parse("0.46.0")

View File

@ -314,3 +314,7 @@ class Bnb8BitHfQuantizer(HfQuantizer):
model, self.modules_to_not_convert, quantization_config=self.quantization_config model, self.modules_to_not_convert, quantization_config=self.quantization_config
) )
return model return model
@property
def is_compileable(self) -> bool:
return version.parse(importlib.metadata.version("bitsandbytes")) >= version.parse("0.46.0")

View File

@ -831,11 +831,3 @@ class Bnb4bitCompile(unittest.TestCase):
max_new_tokens=10, max_new_tokens=10,
cache_implementation="static", cache_implementation="static",
) )
with self.assertRaises(Exception):
# overwrite property
object.__setattr__(self.model_4bit.hf_quantizer, "is_compileable", True)
self.model_4bit.generate(
input_ids=encoded_input["input_ids"].to(self.model_4bit.device),
max_new_tokens=10,
cache_implementation="static",
)

View File

@ -1005,11 +1005,3 @@ class Bnb8bitCompile(unittest.TestCase):
max_new_tokens=10, max_new_tokens=10,
cache_implementation="static", cache_implementation="static",
) )
with self.assertRaises(Exception):
object.__setattr__(self.model_8bit.hf_quantizer, "is_compileable", True)
self.model_8bit.generate(
input_ids=encoded_input["input_ids"].to(self.model_8bit.device),
max_new_tokens=10,
cache_implementation="static",
)