Fix --bf16 option support for Neuron after PR #22300 (#22307)

This PR fixes the "RuntimeError: No CUDA GPUs are available" when running with --bf16 option on Neuron. Related PRs: https://github.com/huggingface/transformers/pull/20684 https://github.com/huggingface/transformers/pull/22300
2025-07-24 23:08:57 +06:00 · 2023-03-23 09:27:13 -07:00 · 2023-03-23 09:27:13 -07:00 · ec9b18f62d
commit ec9b18f62d
parent aef488c503
1 changed files with 6 additions and 1 deletions
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@ -588,7 +588,12 @@ class Trainer:

        if args.fp16 or args.bf16:
            if args.half_precision_backend == "auto":
-                if args.device == torch.device("cpu"):
+                if is_torch_neuroncore_available():
+                    if args.fp16:
+                        raise ValueError("Tried to use `fp16` but this option is not yet supported on Neuron.")
+                    else:
+                        args.half_precision_backend = "cpu_amp"
+                elif args.device == torch.device("cpu"):
                    if args.fp16:
                        raise ValueError("Tried to use `fp16` but it is not supported on cpu")
                    elif _is_native_cpu_amp_available: