mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-05 22:00:09 +06:00

* init * first working test * added todo for setup.py * working test for single node multi node ddp and smd * added tensorflow single node test * added directory for pytorch and tensorflow due to different requirements.txt * added directory for pytorch and tensorflow * added comment for run_glue until it is available * added output_dir to it * smaller dataset to make test running faster * adjust HP and script * adjusted parameter for tensorflow * refactored test scripts * adjusted make file * init * first working test * added todo for setup.py * working test for single node multi node ddp and smd * added tensorflow single node test * added directory for pytorch and tensorflow due to different requirements.txt * added directory for pytorch and tensorflow * added comment for run_glue until it is available * added output_dir to it * smaller dataset to make test running faster * adjust HP and script * adjusted parameter for tensorflow * refactored test scripts * adjusted make file * updated dlc container * commented in all tests * added both ecr images * added new master branches * debug * added new datasets version * init * strange rebase bug * removed changes * changed min version for tests to work * updated DLC * added model parallel test * removed test files * removed test files * tested with ned dlc * added correct sagemaker sdk version * adjust DLCs for official one * reworked tests * quality * removed default profile added documentation to it * added step in release for sagemaker tests * reverted version for example script removed duplicated script and added install from master to requirements.txt * removed mistaken .DS_Stores from mac * fixed tests * added Sylvains feedback * make style * added lysandre's feedback
66 lines
2.1 KiB
Python
66 lines
2.1 KiB
Python
# we define a fixture function below and it will be "used" by
|
|
# referencing its name from tests
|
|
|
|
import os
|
|
|
|
import pytest
|
|
|
|
from attr import dataclass
|
|
|
|
|
|
os.environ["AWS_DEFAULT_REGION"] = "us-east-1" # defaults region
|
|
|
|
|
|
@dataclass
|
|
class SageMakerTestEnvironment:
|
|
framework: str
|
|
role = "arn:aws:iam::558105141721:role/sagemaker_execution_role"
|
|
hyperparameters = {
|
|
"task_name": "mnli",
|
|
"per_device_train_batch_size": 32,
|
|
"per_device_eval_batch_size": 32,
|
|
"do_train": True,
|
|
"do_eval": True,
|
|
"do_predict": True,
|
|
"output_dir": "/opt/ml/model",
|
|
"overwrite_output_dir": True,
|
|
"max_steps": 500,
|
|
"save_steps": 5500,
|
|
}
|
|
distributed_hyperparameters = {**hyperparameters, "max_steps": 1000}
|
|
|
|
@property
|
|
def metric_definitions(self) -> str:
|
|
if self.framework == "pytorch":
|
|
return [
|
|
{"Name": "train_runtime", "Regex": "train_runtime.*=\D*(.*?)$"},
|
|
{"Name": "eval_accuracy", "Regex": "eval_accuracy.*=\D*(.*?)$"},
|
|
{"Name": "eval_loss", "Regex": "eval_loss.*=\D*(.*?)$"},
|
|
]
|
|
else:
|
|
return [
|
|
{"Name": "train_runtime", "Regex": "train_runtime.*=\D*(.*?)$"},
|
|
{"Name": "eval_accuracy", "Regex": "loss.*=\D*(.*?)]?$"},
|
|
{"Name": "eval_loss", "Regex": "sparse_categorical_accuracy.*=\D*(.*?)]?$"},
|
|
]
|
|
|
|
@property
|
|
def base_job_name(self) -> str:
|
|
return f"{self.framework}-transfromers-test"
|
|
|
|
@property
|
|
def test_path(self) -> str:
|
|
return f"./tests/sagemaker/scripts/{self.framework}"
|
|
|
|
@property
|
|
def image_uri(self) -> str:
|
|
if self.framework == "pytorch":
|
|
return "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-training:1.6.0-transformers4.4.2-gpu-py36-cu110-ubuntu18.04"
|
|
else:
|
|
return "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-tensorflow-training:2.4.1-transformers4.4.2-gpu-py37-cu110-ubuntu18.04"
|
|
|
|
|
|
@pytest.fixture(scope="class")
|
|
def sm_env(request):
|
|
request.cls.env = SageMakerTestEnvironment(framework=request.cls.framework)
|