mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 18:22:34 +06:00
device-agnostic deepspeed testing (#27342)
This commit is contained in:
parent
9999b73968
commit
c5d7754b11
@ -38,17 +38,18 @@ from transformers.testing_utils import (
|
|||||||
CaptureStderr,
|
CaptureStderr,
|
||||||
LoggingLevel,
|
LoggingLevel,
|
||||||
TestCasePlus,
|
TestCasePlus,
|
||||||
|
backend_device_count,
|
||||||
execute_subprocess_async,
|
execute_subprocess_async,
|
||||||
get_gpu_count,
|
|
||||||
mockenv_context,
|
mockenv_context,
|
||||||
require_deepspeed,
|
require_deepspeed,
|
||||||
require_optuna,
|
require_optuna,
|
||||||
require_torch_gpu,
|
require_torch_accelerator,
|
||||||
require_torch_multi_gpu,
|
require_torch_multi_accelerator,
|
||||||
slow,
|
slow,
|
||||||
|
torch_device,
|
||||||
)
|
)
|
||||||
from transformers.trainer_utils import get_last_checkpoint, set_seed
|
from transformers.trainer_utils import get_last_checkpoint, set_seed
|
||||||
from transformers.utils import SAFE_WEIGHTS_NAME, is_torch_bf16_gpu_available
|
from transformers.utils import SAFE_WEIGHTS_NAME, is_torch_bf16_available_on_device
|
||||||
|
|
||||||
|
|
||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
@ -125,7 +126,7 @@ def get_launcher(distributed=False):
|
|||||||
# - it won't be able to handle that
|
# - it won't be able to handle that
|
||||||
# 2. for now testing with just 2 gpus max (since some quality tests may give different
|
# 2. for now testing with just 2 gpus max (since some quality tests may give different
|
||||||
# results with mode gpus because we use very little data)
|
# results with mode gpus because we use very little data)
|
||||||
num_gpus = min(2, get_gpu_count()) if distributed else 1
|
num_gpus = min(2, backend_device_count(torch_device)) if distributed else 1
|
||||||
master_port = get_master_port(real_launcher=True)
|
master_port = get_master_port(real_launcher=True)
|
||||||
return f"deepspeed --num_nodes 1 --num_gpus {num_gpus} --master_port {master_port}".split()
|
return f"deepspeed --num_nodes 1 --num_gpus {num_gpus} --master_port {master_port}".split()
|
||||||
|
|
||||||
@ -145,7 +146,7 @@ optims = [HF_OPTIM, DS_OPTIM]
|
|||||||
schedulers = [HF_SCHEDULER, DS_SCHEDULER]
|
schedulers = [HF_SCHEDULER, DS_SCHEDULER]
|
||||||
|
|
||||||
stages = [ZERO2, ZERO3]
|
stages = [ZERO2, ZERO3]
|
||||||
if is_torch_bf16_gpu_available():
|
if is_torch_bf16_available_on_device(torch_device):
|
||||||
dtypes = [FP16, BF16]
|
dtypes = [FP16, BF16]
|
||||||
else:
|
else:
|
||||||
dtypes = [FP16]
|
dtypes = [FP16]
|
||||||
@ -165,7 +166,7 @@ params_with_optims_and_schedulers = list(itertools.product(stages, dtypes, optim
|
|||||||
|
|
||||||
|
|
||||||
@require_deepspeed
|
@require_deepspeed
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
class CoreIntegrationDeepSpeed(TestCasePlus, TrainerIntegrationCommon):
|
class CoreIntegrationDeepSpeed(TestCasePlus, TrainerIntegrationCommon):
|
||||||
"""
|
"""
|
||||||
Testing non-Trainer DeepSpeed integration
|
Testing non-Trainer DeepSpeed integration
|
||||||
@ -273,7 +274,7 @@ class TrainerIntegrationDeepSpeedWithCustomConfig(TestCasePlus):
|
|||||||
|
|
||||||
|
|
||||||
@require_deepspeed
|
@require_deepspeed
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, TrainerIntegrationCommon):
|
class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, TrainerIntegrationCommon):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -875,7 +876,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
|||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_deepspeed
|
@require_deepspeed
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
class TestDeepSpeedWithLauncher(TestCasePlus):
|
class TestDeepSpeedWithLauncher(TestCasePlus):
|
||||||
"""This class is for testing via an external script - can do multiple gpus"""
|
"""This class is for testing via an external script - can do multiple gpus"""
|
||||||
|
|
||||||
@ -896,7 +897,7 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
|
|||||||
#
|
#
|
||||||
|
|
||||||
@parameterized.expand(params, name_func=parameterized_custom_name_func)
|
@parameterized.expand(params, name_func=parameterized_custom_name_func)
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_accelerator
|
||||||
def test_basic_distributed(self, stage, dtype):
|
def test_basic_distributed(self, stage, dtype):
|
||||||
self.run_and_check(stage=stage, dtype=dtype, distributed=True)
|
self.run_and_check(stage=stage, dtype=dtype, distributed=True)
|
||||||
|
|
||||||
@ -927,7 +928,7 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
|
|||||||
)
|
)
|
||||||
|
|
||||||
@parameterized.expand(params, name_func=parameterized_custom_name_func)
|
@parameterized.expand(params, name_func=parameterized_custom_name_func)
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_accelerator
|
||||||
def test_fp32_distributed(self, stage, dtype):
|
def test_fp32_distributed(self, stage, dtype):
|
||||||
# real model needs too much GPU memory under stage2+fp32, so using tiny random model here -
|
# real model needs too much GPU memory under stage2+fp32, so using tiny random model here -
|
||||||
# therefore no quality checks, just basic completion checks are done
|
# therefore no quality checks, just basic completion checks are done
|
||||||
@ -968,9 +969,9 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
|
|||||||
self.do_checks(output_dir, do_train=do_train, do_eval=do_eval)
|
self.do_checks(output_dir, do_train=do_train, do_eval=do_eval)
|
||||||
|
|
||||||
@parameterized.expand(["bf16", "fp16", "fp32"])
|
@parameterized.expand(["bf16", "fp16", "fp32"])
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_accelerator
|
||||||
def test_inference(self, dtype):
|
def test_inference(self, dtype):
|
||||||
if dtype == "bf16" and not is_torch_bf16_gpu_available():
|
if dtype == "bf16" and not is_torch_bf16_available_on_device(torch_device):
|
||||||
self.skipTest("test requires bfloat16 hardware support")
|
self.skipTest("test requires bfloat16 hardware support")
|
||||||
|
|
||||||
# this is just inference, so no optimizer should be loaded
|
# this is just inference, so no optimizer should be loaded
|
||||||
|
Loading…
Reference in New Issue
Block a user