Fix 4090/ada not detected as having FP8 support (#37067)

fix 4090/ada not detected as having FP8 support Signed-off-by: Qubitium <qubitium@modelcloud.ai> Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com> Co-authored-by: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com>
2025-07-31 02:02:21 +06:00 · 2025-03-31 16:53:48 +08:00 · 2025-03-31 16:53:48 +08:00 · 4705b04c74
commit 4705b04c74
parent 2b4734bd49
1 changed files with 3 additions and 2 deletions
--- a/src/transformers/quantizers/quantizer_finegrained_fp8.py
+++ b/src/transformers/quantizers/quantizer_finegrained_fp8.py
@ -52,9 +52,10 @@ class FineGrainedFP8HfQuantizer(HfQuantizer):

        compute_capability = torch.cuda.get_device_capability()
        major, minor = compute_capability
-        if major < 9:
+        if (major < 8) or (major == 8 and minor < 9):
            raise ValueError(
-                "FP8 quantized models is only supported on GPUs with compute capability >= 9.0 (e.g H100)"
+                "FP8 quantized models is only supported on GPUs with compute capability >= 8.9 (e.g 4090/H100)"
+                f", actual = `{major}.{minor}`"
            )

        device_map = kwargs.get("device_map", None)