Fix 4090/ada not detected as having FP8 support (#37067)

fix 4090/ada not detected as having FP8 support

Signed-off-by: Qubitium <qubitium@modelcloud.ai>
Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
Co-authored-by: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com>
This commit is contained in:
Qubitium-ModelCloud 2025-03-31 16:53:48 +08:00 committed by GitHub
parent 2b4734bd49
commit 4705b04c74
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -52,9 +52,10 @@ class FineGrainedFP8HfQuantizer(HfQuantizer):
compute_capability = torch.cuda.get_device_capability()
major, minor = compute_capability
if major < 9:
if (major < 8) or (major == 8 and minor < 9):
raise ValueError(
"FP8 quantized models is only supported on GPUs with compute capability >= 9.0 (e.g H100)"
"FP8 quantized models is only supported on GPUs with compute capability >= 8.9 (e.g 4090/H100)"
f", actual = `{major}.{minor}`"
)
device_map = kwargs.get("device_map", None)