mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 10:12:23 +06:00
oob performance improvement for cpu DDP (#18595)
* oob performance improvement for cpu DDP Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> * add is_psutil_available check Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
c3be98ebab
commit
cdde85a0a0
@ -39,6 +39,7 @@ from .utils import (
|
||||
ccl_version,
|
||||
get_full_repo_name,
|
||||
is_accelerate_available,
|
||||
is_psutil_available,
|
||||
is_sagemaker_dp_enabled,
|
||||
is_sagemaker_mp_enabled,
|
||||
is_torch_available,
|
||||
@ -1342,6 +1343,21 @@ class TrainingArguments:
|
||||
"Looks like distributed multinode run but MASTER_ADDR env not set, "
|
||||
"please try exporting rank 0's hostname as MASTER_ADDR"
|
||||
)
|
||||
if (
|
||||
torch.get_num_threads() == 1
|
||||
and get_int_from_env(["OMP_NUM_THREADS", "MKL_NUM_THREADS"], 0) == 0
|
||||
and is_psutil_available()
|
||||
):
|
||||
import psutil
|
||||
|
||||
num_cpu_threads_per_process = int(psutil.cpu_count(logical=False) / local_size)
|
||||
if num_cpu_threads_per_process == 0:
|
||||
num_cpu_threads_per_process = 1
|
||||
torch.set_num_threads(num_cpu_threads_per_process)
|
||||
logger.info(
|
||||
f"num_cpu_threads_per_process unset, we set it at {num_cpu_threads_per_process} to improve oob"
|
||||
" performance."
|
||||
)
|
||||
torch.distributed.init_process_group(backend=self.xpu_backend, rank=rank, world_size=size)
|
||||
elif is_torch_tpu_available():
|
||||
device = xm.xla_device()
|
||||
|
Loading…
Reference in New Issue
Block a user