diff --git a/examples/legacy/seq2seq/old_test_datasets.py b/examples/legacy/seq2seq/old_test_datasets.py index 6792fcf6ddd..b85d7966e97 100644 --- a/examples/legacy/seq2seq/old_test_datasets.py +++ b/examples/legacy/seq2seq/old_test_datasets.py @@ -24,7 +24,7 @@ from parameterized import parameterized from save_len_file import save_len_file from transformers import AutoTokenizer from transformers.models.mbart.modeling_mbart import shift_tokens_right -from transformers.testing_utils import TestCasePlus, require_torch_non_multi_gpu_but_fix_me, slow +from transformers.testing_utils import TestCasePlus, slow from utils import FAIRSEQ_AVAILABLE, DistributedSortishSampler, LegacySeq2SeqDataset, Seq2SeqDataset @@ -61,7 +61,6 @@ class TestAll(TestCasePlus): ], ) @slow - @require_torch_non_multi_gpu_but_fix_me def test_seq2seq_dataset_truncation(self, tok_name): tokenizer = AutoTokenizer.from_pretrained(tok_name) tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) @@ -101,7 +100,6 @@ class TestAll(TestCasePlus): break # No need to test every batch @parameterized.expand([BART_TINY, BERT_BASE_CASED]) - @require_torch_non_multi_gpu_but_fix_me def test_legacy_dataset_truncation(self, tok): tokenizer = AutoTokenizer.from_pretrained(tok) tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) @@ -126,7 +124,6 @@ class TestAll(TestCasePlus): assert max_len_target > trunc_target # Truncated break # No need to test every batch - @require_torch_non_multi_gpu_but_fix_me def test_pack_dataset(self): tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25") @@ -145,7 +142,6 @@ class TestAll(TestCasePlus): assert orig_paths == new_paths @pytest.mark.skipif(not FAIRSEQ_AVAILABLE, reason="This test requires fairseq") - @require_torch_non_multi_gpu_but_fix_me def test_dynamic_batch_size(self): if not FAIRSEQ_AVAILABLE: return @@ -170,7 +166,6 @@ class TestAll(TestCasePlus): if failures: raise AssertionError(f"too many tokens in {len(failures)} batches") - @require_torch_non_multi_gpu_but_fix_me def test_sortish_sampler_reduces_padding(self): ds, _, tokenizer = self._get_dataset(max_len=512) bs = 2 @@ -210,7 +205,6 @@ class TestAll(TestCasePlus): ) return ds, max_tokens, tokenizer - @require_torch_non_multi_gpu_but_fix_me def test_distributed_sortish_sampler_splits_indices_between_procs(self): ds, max_tokens, tokenizer = self._get_dataset() ids1 = set(DistributedSortishSampler(ds, 256, num_replicas=2, rank=0, add_extra_examples=False)) @@ -226,7 +220,6 @@ class TestAll(TestCasePlus): PEGASUS_XSUM, ], ) - @require_torch_non_multi_gpu_but_fix_me def test_dataset_kwargs(self, tok_name): tokenizer = AutoTokenizer.from_pretrained(tok_name, use_fast=False) if tok_name == MBART_TINY: diff --git a/examples/legacy/seq2seq/old_test_tatoeba_conversion.py b/examples/legacy/seq2seq/old_test_tatoeba_conversion.py index 5747811bddd..b5b7e56f619 100644 --- a/examples/legacy/seq2seq/old_test_tatoeba_conversion.py +++ b/examples/legacy/seq2seq/old_test_tatoeba_conversion.py @@ -18,7 +18,7 @@ import unittest from transformers.file_utils import cached_property from transformers.models.marian.convert_marian_tatoeba_to_pytorch import DEFAULT_REPO, TatoebaConverter -from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, slow +from transformers.testing_utils import slow @unittest.skipUnless(os.path.exists(DEFAULT_REPO), "Tatoeba directory does not exist.") @@ -29,12 +29,10 @@ class TatoebaConversionTester(unittest.TestCase): return TatoebaConverter(save_dir=tmp_dir) @slow - @require_torch_non_multi_gpu_but_fix_me def test_resolver(self): self.resolver.convert_models(["heb-eng"]) @slow - @require_torch_non_multi_gpu_but_fix_me def test_model_card(self): content, mmeta = self.resolver.write_model_card("opus-mt-he-en", dry_run=True) assert mmeta["long_pair"] == "heb-eng" diff --git a/examples/research_projects/bert-loses-patience/test_run_glue_with_pabee.py b/examples/research_projects/bert-loses-patience/test_run_glue_with_pabee.py index 10df36b5d8a..22c6f4de06f 100644 --- a/examples/research_projects/bert-loses-patience/test_run_glue_with_pabee.py +++ b/examples/research_projects/bert-loses-patience/test_run_glue_with_pabee.py @@ -4,7 +4,7 @@ import sys from unittest.mock import patch import run_glue_with_pabee -from transformers.testing_utils import TestCasePlus, require_torch_non_multi_gpu_but_fix_me +from transformers.testing_utils import TestCasePlus logging.basicConfig(level=logging.DEBUG) @@ -20,7 +20,6 @@ def get_setup_file(): class PabeeTests(TestCasePlus): - @require_torch_non_multi_gpu_but_fix_me def test_run_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) diff --git a/examples/research_projects/deebert/test_glue_deebert.py b/examples/research_projects/deebert/test_glue_deebert.py index ce714ff5d26..7a709308e6f 100644 --- a/examples/research_projects/deebert/test_glue_deebert.py +++ b/examples/research_projects/deebert/test_glue_deebert.py @@ -1,11 +1,10 @@ import argparse import logging import sys -import unittest from unittest.mock import patch import run_glue_deebert -from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, slow +from transformers.testing_utils import TestCasePlus, get_gpu_count, require_torch_non_multi_gpu, slow logging.basicConfig(level=logging.DEBUG) @@ -20,17 +19,34 @@ def get_setup_file(): return args.f -class DeeBertTests(unittest.TestCase): +class DeeBertTests(TestCasePlus): def setup(self) -> None: stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) + def run_and_check(self, args): + n_gpu = get_gpu_count() + + if n_gpu > 1: + pass + # XXX: doesn't quite work with n_gpu > 1 https://github.com/huggingface/transformers/issues/10560 + # script = f"{self.examples_dir_str}/research_projects/deebert/run_glue_deebert.py" + # distributed_args = f"-m torch.distributed.launch --nproc_per_node={n_gpu} {script}".split() + # cmd = [sys.executable] + distributed_args + args + # execute_subprocess_async(cmd, env=self.get_env()) + # XXX: test the results - need to save them first into .json file + else: + args.insert(0, "run_glue_deebert.py") + with patch.object(sys, "argv", args): + result = run_glue_deebert.main() + for value in result.values(): + self.assertGreaterEqual(value, 0.666) + @slow - @require_torch_non_multi_gpu_but_fix_me + @require_torch_non_multi_gpu def test_glue_deebert_train(self): train_args = """ - run_glue_deebert.py --model_type roberta --model_name_or_path roberta-base --task_name MRPC @@ -51,13 +67,9 @@ class DeeBertTests(unittest.TestCase): --overwrite_cache --eval_after_first_stage """.split() - with patch.object(sys, "argv", train_args): - result = run_glue_deebert.main() - for value in result.values(): - self.assertGreaterEqual(value, 0.666) + self.run_and_check(train_args) eval_args = """ - run_glue_deebert.py --model_type roberta --model_name_or_path ./examples/deebert/saved_models/roberta-base/MRPC/two_stage --task_name MRPC @@ -72,13 +84,9 @@ class DeeBertTests(unittest.TestCase): --overwrite_cache --per_gpu_eval_batch_size=1 """.split() - with patch.object(sys, "argv", eval_args): - result = run_glue_deebert.main() - for value in result.values(): - self.assertGreaterEqual(value, 0.666) + self.run_and_check(eval_args) entropy_eval_args = """ - run_glue_deebert.py --model_type roberta --model_name_or_path ./examples/deebert/saved_models/roberta-base/MRPC/two_stage --task_name MRPC @@ -93,7 +101,4 @@ class DeeBertTests(unittest.TestCase): --overwrite_cache --per_gpu_eval_batch_size=1 """.split() - with patch.object(sys, "argv", entropy_eval_args): - result = run_glue_deebert.main() - for value in result.values(): - self.assertGreaterEqual(value, 0.666) + self.run_and_check(entropy_eval_args) diff --git a/examples/research_projects/rag/test_distributed_retriever.py b/examples/research_projects/rag/test_distributed_retriever.py index 8865a309895..ac54d1f9857 100644 --- a/examples/research_projects/rag/test_distributed_retriever.py +++ b/examples/research_projects/rag/test_distributed_retriever.py @@ -17,7 +17,7 @@ from transformers.integrations import is_ray_available from transformers.models.bert.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES from transformers.models.rag.retrieval_rag import CustomHFIndex, RagRetriever from transformers.models.roberta.tokenization_roberta import VOCAB_FILES_NAMES as BART_VOCAB_FILES_NAMES -from transformers.testing_utils import require_ray, require_torch_non_multi_gpu_but_fix_me +from transformers.testing_utils import require_ray sys.path.append(os.path.join(os.getcwd())) # noqa: E402 # noqa: E402 # isort:skip @@ -265,7 +265,6 @@ class RagRetrieverTest(TestCase): self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc self.assertListEqual(doc_ids.tolist(), [[1], [0]]) - @require_torch_non_multi_gpu_but_fix_me def test_pytorch_distributed_retriever_retrieve(self): n_docs = 1 hidden_states = np.array( @@ -276,7 +275,6 @@ class RagRetrieverTest(TestCase): self.get_dummy_pytorch_distributed_retriever(init_retrieval=True), hidden_states, n_docs ) - @require_torch_non_multi_gpu_but_fix_me def test_custom_hf_index_pytorch_retriever_retrieve(self): n_docs = 1 hidden_states = np.array( @@ -289,7 +287,6 @@ class RagRetrieverTest(TestCase): n_docs, ) - @require_torch_non_multi_gpu_but_fix_me def test_custom_pytorch_distributed_retriever_retrieve_from_disk(self): n_docs = 1 hidden_states = np.array( diff --git a/examples/research_projects/seq2seq-distillation/_test_make_student.py b/examples/research_projects/seq2seq-distillation/_test_make_student.py index ebb54bbfc43..0a1688a95cc 100644 --- a/examples/research_projects/seq2seq-distillation/_test_make_student.py +++ b/examples/research_projects/seq2seq-distillation/_test_make_student.py @@ -4,7 +4,7 @@ import unittest from make_student import create_student_by_copying_alternating_layers from transformers import AutoConfig from transformers.file_utils import cached_property -from transformers.testing_utils import require_torch, require_torch_non_multi_gpu_but_fix_me +from transformers.testing_utils import require_torch TINY_BART = "sshleifer/bart-tiny-random" @@ -17,28 +17,23 @@ class MakeStudentTester(unittest.TestCase): def teacher_config(self): return AutoConfig.from_pretrained(TINY_BART) - @require_torch_non_multi_gpu_but_fix_me def test_valid_t5(self): student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=1) self.assertEqual(student.config.num_hidden_layers, 1) - @require_torch_non_multi_gpu_but_fix_me def test_asymmetric_t5(self): student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=None) - @require_torch_non_multi_gpu_but_fix_me def test_same_decoder_small_encoder(self): student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=None) self.assertEqual(student.config.encoder_layers, 1) self.assertEqual(student.config.decoder_layers, self.teacher_config.encoder_layers) - @require_torch_non_multi_gpu_but_fix_me def test_small_enc_small_dec(self): student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=1) self.assertEqual(student.config.encoder_layers, 1) self.assertEqual(student.config.decoder_layers, 1) - @require_torch_non_multi_gpu_but_fix_me def test_raises_assert(self): with self.assertRaises(AssertionError): create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=None, d=None) diff --git a/examples/test_examples.py b/examples/test_examples.py index 5d074b22f9b..276364ca915 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -24,7 +24,7 @@ from unittest.mock import patch import torch from transformers.file_utils import is_apex_available -from transformers.testing_utils import TestCasePlus, require_torch_non_multi_gpu_but_fix_me, slow, torch_device +from transformers.testing_utils import TestCasePlus, get_gpu_count, slow, torch_device SRC_DIRS = [ @@ -82,7 +82,6 @@ def is_cuda_and_apex_available(): class ExamplesTests(TestCasePlus): - @require_torch_non_multi_gpu_but_fix_me def test_run_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -114,7 +113,6 @@ class ExamplesTests(TestCasePlus): result = get_results(tmp_dir) self.assertGreaterEqual(result["eval_accuracy"], 0.75) - @require_torch_non_multi_gpu_but_fix_me def test_run_clm(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -147,7 +145,6 @@ class ExamplesTests(TestCasePlus): result = get_results(tmp_dir) self.assertLess(result["perplexity"], 100) - @require_torch_non_multi_gpu_but_fix_me def test_run_mlm(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -174,11 +171,13 @@ class ExamplesTests(TestCasePlus): result = get_results(tmp_dir) self.assertLess(result["perplexity"], 42) - @require_torch_non_multi_gpu_but_fix_me def test_run_ner(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) + # with so little data distributed training needs more epochs to get the score on par with 0/1 gpu + epochs = 7 if get_gpu_count() > 1 else 2 + tmp_dir = self.get_auto_remove_tmp_dir() testargs = f""" run_ner.py @@ -193,7 +192,7 @@ class ExamplesTests(TestCasePlus): --learning_rate=2e-4 --per_device_train_batch_size=2 --per_device_eval_batch_size=2 - --num_train_epochs=2 + --num_train_epochs={epochs} """.split() if torch_device != "cuda": @@ -206,7 +205,6 @@ class ExamplesTests(TestCasePlus): self.assertGreaterEqual(result["eval_precision"], 0.75) self.assertLess(result["eval_loss"], 0.5) - @require_torch_non_multi_gpu_but_fix_me def test_run_squad(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -235,7 +233,6 @@ class ExamplesTests(TestCasePlus): self.assertGreaterEqual(result["f1"], 30) self.assertGreaterEqual(result["exact"], 30) - @require_torch_non_multi_gpu_but_fix_me def test_run_swag(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -262,7 +259,6 @@ class ExamplesTests(TestCasePlus): result = get_results(tmp_dir) self.assertGreaterEqual(result["eval_accuracy"], 0.8) - @require_torch_non_multi_gpu_but_fix_me def test_generation(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -281,7 +277,6 @@ class ExamplesTests(TestCasePlus): self.assertGreaterEqual(len(result[0]), 10) @slow - @require_torch_non_multi_gpu_but_fix_me def test_run_seq2seq_summarization(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -314,7 +309,6 @@ class ExamplesTests(TestCasePlus): self.assertGreaterEqual(result["eval_rougeLsum"], 7) @slow - @require_torch_non_multi_gpu_but_fix_me def test_run_seq2seq_translation(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) diff --git a/examples/test_xla_examples.py b/examples/test_xla_examples.py index 86c031cea12..ed1458a010f 100644 --- a/examples/test_xla_examples.py +++ b/examples/test_xla_examples.py @@ -20,7 +20,7 @@ import unittest from time import time from unittest.mock import patch -from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, require_torch_tpu +from transformers.testing_utils import require_torch_tpu logging.basicConfig(level=logging.DEBUG) @@ -30,7 +30,6 @@ logger = logging.getLogger() @require_torch_tpu class TorchXLAExamplesTests(unittest.TestCase): - @require_torch_non_multi_gpu_but_fix_me def test_run_glue(self): import xla_spawn @@ -82,7 +81,6 @@ class TorchXLAExamplesTests(unittest.TestCase): # Assert that the script takes less than 300 seconds to make sure it doesn't hang. self.assertLess(end - start, 500) - @require_torch_non_multi_gpu_but_fix_me def test_trainer_tpu(self): import xla_spawn diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index b2ed86ce291..10a67953cf5 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -301,12 +301,6 @@ def require_torch_non_multi_gpu(test_case): return test_case -# this is a decorator identical to require_torch_non_multi_gpu, but is used as a quick band-aid to -# allow all of examples to be run multi-gpu CI and it reminds us that tests decorated with this one -# need to be ported and aren't so by design. -require_torch_non_multi_gpu_but_fix_me = require_torch_non_multi_gpu - - def require_torch_tpu(test_case): """ Decorator marking a test that requires a TPU (in PyTorch).