Don't accidentally mutate the base_model_tp_plan (#36677)

* Don't accidentally mutate the base_model_tp_plan * Co-authored by: Joao Gante <joaofranciscocardosogante@gmail.com> * Trigger tests * Marking grad accum test as slow * Add a flaky decorator * Add a flaky decorator * Use cyril's codeblock * Don't copy() when it's None * Use cyril's new codeblock * make fixup
2025-07-31 02:02:21 +06:00 · 2025-03-12 18:59:13 +00:00 · 2025-03-12 18:59:13 +00:00 · c7eb95581a
commit c7eb95581a
parent 071a161d3e
3 changed files with 11 additions and 3 deletions
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@ -1895,9 +1895,15 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix

        # If current model is a base model, attach `base_model_tp_plan` and `base_model_pp_plan` from config
        if self.base_model is self:
-            self._pp_plan = self.config.base_model_pp_plan
-
-        self._tp_plan = self._tp_plan or self.config.base_model_tp_plan or {}
+            self._pp_plan = (
+                self.config.base_model_pp_plan.copy() if self.config.base_model_pp_plan is not None else None
+            )
+            self._tp_plan = self.config.base_model_tp_plan.copy() if self.config.base_model_tp_plan is not None else {}
+        else:
+            self._tp_plan = self._tp_plan or {}
+            for name, module in self.named_children():
+                if plan := getattr(module, "_tp_plan", None):
+                    self._tp_plan.update({f"{name}.{k}": v for k, v in plan.items()})
        for name, module in self.named_children():
            if plan := getattr(module, "_tp_plan", None):
                self._tp_plan.update({f"{name}.{k}": v for k, v in plan.items()})
--- a/tests/generation/test_utils.py
+++ b/tests/generation/test_utils.py
@ -2305,6 +2305,7 @@ class GenerationTesterMixin:
            self.assertEqual(with_all_logits.tolist(), without_all_logits.tolist())

    @pytest.mark.generate
+    @is_flaky
    def test_assisted_decoding_with_logits_to_keep(self):
        for model_class in self.all_generative_model_classes:
            if "logits_to_keep" not in set(inspect.signature(model_class.forward).parameters.keys()):
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@ -803,6 +803,7 @@ class TrainerIntegrationPrerunTest(TestCasePlus, TrainerIntegrationCommon):
            trainer.train()
            self.check_trained_model(trainer.model, alternate_seed=True)

+    @slow
    def test_gradient_accumulation_loss_alignment_with_model_loss(self):
        set_seed(42)
        import datasets