remove the obsolete code related to fairscale FSDP (#26651)

* remove the obsolete code related to fairscale FSDP * apple review suggestion
2025-07-31 02:02:21 +06:00 · 2023-10-30 19:55:03 +08:00 · 2023-10-30 19:55:03 +08:00 · d751dbecb2
commit d751dbecb2
parent 5fbed2d7ca
2 changed files with 1 additions and 10 deletions
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@ -1850,12 +1850,6 @@ class Trainer:

                        if is_sagemaker_mp_enabled() and args.fp16:
                            self.optimizer.clip_master_grads(args.max_grad_norm)
-                        elif hasattr(self.optimizer, "clip_grad_norm"):
-                            # Some optimizers (like the sharded optimizer) have a specific way to do gradient clipping
-                            self.optimizer.clip_grad_norm(args.max_grad_norm)
-                        elif hasattr(model, "clip_grad_norm_"):
-                            # Some models (like FullyShardedDDP) have a specific way to do gradient clipping
-                            model.clip_grad_norm_(args.max_grad_norm)
                        elif self.use_apex:
                            # Revert to normal clipping otherwise, handling Apex or full precision
                            nn.utils.clip_grad_norm_(
--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@ -1384,10 +1384,7 @@ class TrainingArguments:

        if self.bf16:
            if self.half_precision_backend == "apex":
-                raise ValueError(
-                    " `--half_precision_backend apex`: GPU bf16 is not supported by apex. Use"
-                    " `--half_precision_backend cuda_amp` instead"
-                )
+                raise ValueError(" `--half_precision_backend apex`: GPU bf16 is not supported by apex.")

        if self.lr_scheduler_type == SchedulerType.REDUCE_ON_PLATEAU:
            if self.evaluation_strategy == IntervalStrategy.NO: