From ec9b18f62d9bc399192bb44fecdbb93ca9ee1b1e Mon Sep 17 00:00:00 2001 From: jeffhataws <56947987+jeffhataws@users.noreply.github.com> Date: Thu, 23 Mar 2023 09:27:13 -0700 Subject: [PATCH] Fix --bf16 option support for Neuron after PR #22300 (#22307) This PR fixes the "RuntimeError: No CUDA GPUs are available" when running with --bf16 option on Neuron. Related PRs: https://github.com/huggingface/transformers/pull/20684 https://github.com/huggingface/transformers/pull/22300 --- src/transformers/trainer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index a41d43edeb4..7267d79b3c8 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -588,7 +588,12 @@ class Trainer: if args.fp16 or args.bf16: if args.half_precision_backend == "auto": - if args.device == torch.device("cpu"): + if is_torch_neuroncore_available(): + if args.fp16: + raise ValueError("Tried to use `fp16` but this option is not yet supported on Neuron.") + else: + args.half_precision_backend = "cpu_amp" + elif args.device == torch.device("cpu"): if args.fp16: raise ValueError("Tried to use `fp16` but it is not supported on cpu") elif _is_native_cpu_amp_available: