Merge 3e2d38efab into 2d561713f8

2025-07-03 12:50:06 +06:00 · 2025-07-02 10:50:40 -07:00 · 2025-07-02 10:50:40 -07:00 · 9946d55ecf
commit 9946d55ecf
parent 2d561713f8 3e2d38efab
1 changed files with 6 additions and 4 deletions
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@ -4431,10 +4431,12 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, PushToHubMixin, PeftAdapterMi
                "`tp_plan` and `device_map` are mutually exclusive. Choose either one for parallelization."
            )

-        # If torchrun was used, make sure to TP by default. This way people don't need to change tp or device map
-        if device_map == "auto" and tp_plan is None and int(os.environ.get("WORLD_SIZE", 0)):
-            tp_plan = "auto"  # device_map = "auto" in torchrun equivalent to TP plan = AUTO!
-            device_map = None
+        if device_map == "auto" and int(os.environ.get("WORLD_SIZE", 0)):
+            logger.info(
+                "You've set device_map=`auto` while triggering a distributed run with torchrun. This might lead to unexpected behavior. "
+                "If your plan is to load the model on each device, you should set device_map={"
+                ": PartialState().process_index} where PartialState comes from accelerate library"
+            )

        # We need to correctly dispatch the model on the current process device. The easiest way for this is to use a simple
        # `device_map` pointing to the correct device