mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-02 19:21:31 +06:00
better place
This commit is contained in:
parent
131587cd12
commit
9302204772
@ -4306,6 +4306,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, PushToHubMixin, PeftAdapterMi
|
||||
"`tp_plan` and `device_map` are mutually exclusive. Choose either one for parallelization."
|
||||
)
|
||||
|
||||
if device_map == "auto" and int(os.environ.get("WORLD_SIZE", 0)):
|
||||
logger.info("You've set device_map=`auto` while triggering a distributed run with torchrun. This might lead to unexpected behavior. "
|
||||
"If your plan is to load the model on each device, you should set device_map={"": PartialState().process_index} where PartialState comes from accelerate library")
|
||||
|
||||
# We need to correctly dispatch the model on the current process device. The easiest way for this is to use a simple
|
||||
# `device_map` pointing to the correct device
|
||||
if tp_plan is not None:
|
||||
@ -4317,10 +4321,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, PushToHubMixin, PeftAdapterMi
|
||||
raise ValueError("device_mesh must be 1 dimensional and will be used for TP")
|
||||
device_map = torch.device(device_mesh.device_type, int(os.environ["LOCAL_RANK"]))
|
||||
|
||||
if device_map == "auto" and int(os.environ.get("WORLD_SIZE", 0)):
|
||||
logger.info("You've set device_map=`auto` while triggering a distributed run with torchrun. This might lead to unexpected behavior. "
|
||||
"If your plan is to load the model on each device, you should set device_map={"": PartialState().process_index} where PartialState comes from accelerate library")
|
||||
|
||||
|
||||
if use_auth_token is not None:
|
||||
warnings.warn(
|
||||
"The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
|
||||
|
Loading…
Reference in New Issue
Block a user