Fix no_trainer CI (#18242)

* Fix all tests
This commit is contained in:
Zachary Mueller 2022-07-21 14:44:57 -04:00 committed by GitHub
parent 561b9a8c00
commit 99eb9b523f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 46 additions and 14 deletions

View File

@ -19,14 +19,14 @@ import json
import logging
import os
import shutil
import subprocess
import sys
import tempfile
from unittest import mock
import torch
from accelerate.utils import write_basic_config
from transformers.testing_utils import TestCasePlus, get_gpu_count, slow, torch_device
from transformers.testing_utils import TestCasePlus, get_gpu_count, run_command, slow, torch_device
from transformers.utils import is_apex_available
@ -75,6 +75,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdir)
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_glue_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f"""
@ -94,12 +95,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
if is_cuda_and_apex_available():
testargs.append("--fp16")
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE)
run_command(self._launch_args + testargs)
result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_accuracy"], 0.75)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "glue_no_trainer")))
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_clm_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f"""
@ -120,12 +122,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
# Skipping because there are not enough batches to train the model + would need a drop_last to work.
return
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE)
run_command(self._launch_args + testargs)
result = get_results(tmp_dir)
self.assertLess(result["perplexity"], 100)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "clm_no_trainer")))
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_mlm_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f"""
@ -139,12 +142,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking
""".split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE)
run_command(self._launch_args + testargs)
result = get_results(tmp_dir)
self.assertLess(result["perplexity"], 42)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "mlm_no_trainer")))
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_ner_no_trainer(self):
# with so little data distributed training needs more epochs to get the score on par with 0/1 gpu
epochs = 7 if get_gpu_count() > 1 else 2
@ -165,13 +169,14 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking
""".split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE)
run_command(self._launch_args + testargs)
result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_accuracy"], 0.75)
self.assertLess(result["train_loss"], 0.5)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "ner_no_trainer")))
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_squad_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f"""
@ -190,7 +195,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking
""".split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE)
run_command(self._launch_args + testargs)
result = get_results(tmp_dir)
# Because we use --version_2_with_negative the testing script uses SQuAD v2 metrics.
self.assertGreaterEqual(result["eval_f1"], 28)
@ -198,6 +203,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "qa_no_trainer")))
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_swag_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f"""
@ -214,12 +220,13 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking
""".split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE)
run_command(self._launch_args + testargs)
result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_accuracy"], 0.8)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "swag_no_trainer")))
@slow
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_summarization_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f"""
@ -237,7 +244,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking
""".split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE)
run_command(self._launch_args + testargs)
result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_rouge1"], 10)
self.assertGreaterEqual(result["eval_rouge2"], 2)
@ -247,6 +254,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "summarization_no_trainer")))
@slow
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_translation_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f"""
@ -268,7 +276,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--with_tracking
""".split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE)
run_command(self._launch_args + testargs)
result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_bleu"], 30)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
@ -292,10 +300,11 @@ class ExamplesTestsNoTrainer(TestCasePlus):
--checkpointing_steps epoch
""".split()
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE)
run_command(self._launch_args + testargs)
result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_overall_accuracy"], 0.10)
@mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
def test_run_image_classification_no_trainer(self):
tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f"""
@ -316,9 +325,9 @@ class ExamplesTestsNoTrainer(TestCasePlus):
if is_cuda_and_apex_available():
testargs.append("--fp16")
_ = subprocess.run(self._launch_args + testargs, stdout=subprocess.PIPE)
run_command(self._launch_args + testargs)
result = get_results(tmp_dir)
# The base model scores a 25%
self.assertGreaterEqual(result["eval_accuracy"], 0.625)
self.assertGreaterEqual(result["eval_accuracy"], 0.6)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "step_1")))
self.assertTrue(os.path.exists(os.path.join(tmp_dir, "image_classification_no_trainer")))

View File

@ -20,6 +20,7 @@ import os
import re
import shlex
import shutil
import subprocess
import sys
import tempfile
import unittest
@ -27,7 +28,7 @@ from collections.abc import Mapping
from distutils.util import strtobool
from io import StringIO
from pathlib import Path
from typing import Iterator, Union
from typing import Iterator, List, Union
from unittest import mock
from transformers import logging as transformers_logging
@ -1561,3 +1562,25 @@ def to_2tuple(x):
if isinstance(x, collections.abc.Iterable):
return x
return (x, x)
# These utils relate to ensuring the right error message is received when running scripts
class SubprocessCallException(Exception):
pass
def run_command(command: List[str], return_stdout=False):
"""
Runs `command` with `subprocess.check_output` and will potentially return the `stdout`. Will also properly capture
if an error occured while running `command`
"""
try:
output = subprocess.check_output(command, stderr=subprocess.STDOUT)
if return_stdout:
if hasattr(output, "decode"):
output = output.decode("utf-8")
return output
except subprocess.CalledProcessError as e:
raise SubprocessCallException(
f"Command `{' '.join(command)}` failed with the following error:\n\n{e.output.decode()}"
) from e