diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index 0f608edebb4..e9e17cd6c0d 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -135,7 +135,7 @@ jobs: name: run_all_tests_tf_gpu_test_reports path: reports - run_tests_torch_multiple_gpu: + run_tests_torch_multi_gpu: runs-on: [self-hosted, multi-gpu] steps: - uses: actions/checkout@v2 @@ -154,7 +154,7 @@ jobs: id: cache with: path: .env - key: v1.1-tests_torch_multiple_gpu-${{ hashFiles('setup.py') }} + key: v1.1-tests_torch_multi_gpu-${{ hashFiles('setup.py') }} - name: Create new python env (on self-hosted runners we have to handle isolation ourselves) run: | @@ -181,11 +181,11 @@ jobs: OMP_NUM_THREADS: 1 run: | source .env/bin/activate - python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_torch_multiple_gpu tests + python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_torch_multi_gpu tests - name: Failure short reports if: ${{ always() }} - run: cat reports/tests_torch_multiple_gpu_failures_short.txt + run: cat reports/tests_torch_multi_gpu_failures_short.txt - name: Test suite reports artifacts if: ${{ always() }} @@ -194,7 +194,7 @@ jobs: name: run_all_tests_torch_multi_gpu_test_reports path: reports - run_tests_tf_multiple_gpu: + run_tests_tf_multi_gpu: runs-on: [self-hosted, multi-gpu] steps: - uses: actions/checkout@v2 @@ -213,7 +213,7 @@ jobs: id: cache with: path: .env - key: v1.1-tests_tf_multiple_gpu-${{ hashFiles('setup.py') }} + key: v1.1-tests_tf_multi_gpu-${{ hashFiles('setup.py') }} - name: Create new python env (on self-hosted runners we have to handle isolation ourselves) run: | @@ -240,11 +240,11 @@ jobs: OMP_NUM_THREADS: 1 run: | source .env/bin/activate - python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_tf_multiple_gpu tests + python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_tf_multi_gpu tests - name: Failure short reports if: ${{ always() }} - run: cat reports/tests_tf_multiple_gpu_failures_short.txt + run: cat reports/tests_tf_multi_gpu_failures_short.txt - name: Test suite reports artifacts if: ${{ always() }} diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 6033a9e4e83..54c126f39f8 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -187,7 +187,7 @@ jobs: name: run_all_tests_tf_gpu_test_reports path: reports - run_all_tests_torch_multiple_gpu: + run_all_tests_torch_multi_gpu: runs-on: [self-hosted, multi-gpu] steps: - uses: actions/checkout@v2 @@ -238,11 +238,11 @@ jobs: RUN_SLOW: yes run: | source .env/bin/activate - python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_multiple_gpu tests + python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_multi_gpu tests - name: Failure short reports if: ${{ always() }} - run: cat reports/tests_torch_multiple_gpu_failures_short.txt + run: cat reports/tests_torch_multi_gpu_failures_short.txt - name: Run examples tests on multi-GPU env: @@ -250,11 +250,11 @@ jobs: RUN_SLOW: yes run: | source .env/bin/activate - python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multiple_gpu examples + python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multi_gpu examples - name: Failure short reports if: ${{ always() }} - run: cat reports/examples_torch_multiple_gpu_failures_short.txt + run: cat reports/examples_torch_multi_gpu_failures_short.txt - name: Run all pipeline tests on multi-GPU if: ${{ always() }} @@ -265,11 +265,11 @@ jobs: RUN_PIPELINE_TESTS: yes run: | source .env/bin/activate - python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_torch_pipeline_multiple_gpu tests + python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests - name: Failure short reports if: ${{ always() }} - run: cat reports/tests_torch_pipeline_multiple_gpu_failures_short.txt + run: cat reports/tests_torch_pipeline_multi_gpu_failures_short.txt - name: Test suite reports artifacts if: ${{ always() }} @@ -278,7 +278,7 @@ jobs: name: run_all_tests_torch_multi_gpu_test_reports path: reports - run_all_tests_tf_multiple_gpu: + run_all_tests_tf_multi_gpu: runs-on: [self-hosted, multi-gpu] steps: - uses: actions/checkout@v2 @@ -329,11 +329,11 @@ jobs: RUN_SLOW: yes run: | source .env/bin/activate - python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_tf_multiple_gpu tests + python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_tf_multi_gpu tests - name: Failure short reports if: ${{ always() }} - run: cat reports/tests_tf_multiple_gpu_failures_short.txt + run: cat reports/tests_tf_multi_gpu_failures_short.txt - name: Run all pipeline tests on multi-GPU if: ${{ always() }} @@ -344,11 +344,11 @@ jobs: RUN_PIPELINE_TESTS: yes run: | source .env/bin/activate - python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_tf_pipelines_multiple_gpu tests + python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_tf_pipelines_multi_gpu tests - name: Failure short reports if: ${{ always() }} - run: cat reports/tests_tf_multiple_gpu_pipelines_failures_short.txt + run: cat reports/tests_tf_multi_gpu_pipelines_failures_short.txt - name: Test suite reports artifacts if: ${{ always() }} diff --git a/docs/source/testing.rst b/docs/source/testing.rst index 9da69365dcc..b16c77ee244 100644 --- a/docs/source/testing.rst +++ b/docs/source/testing.rst @@ -405,32 +405,32 @@ decorators are used to set the requirements of tests CPU/GPU/TPU-wise: * ``require_torch`` - this test will run only under torch * ``require_torch_gpu`` - as ``require_torch`` plus requires at least 1 GPU -* ``require_torch_multigpu`` - as ``require_torch`` plus requires at least 2 GPUs -* ``require_torch_non_multigpu`` - as ``require_torch`` plus requires 0 or 1 GPUs +* ``require_torch_multi_gpu`` - as ``require_torch`` plus requires at least 2 GPUs +* ``require_torch_non_multi_gpu`` - as ``require_torch`` plus requires 0 or 1 GPUs * ``require_torch_tpu`` - as ``require_torch`` plus requires at least 1 TPU Let's depict the GPU requirements in the following table: -+----------+---------------------------------+ -| n gpus | decorator | -+==========+=================================+ -| ``>= 0`` | ``@require_torch`` | -+----------+---------------------------------+ -| ``>= 1`` | ``@require_torch_gpu`` | -+----------+---------------------------------+ -| ``>= 2`` | ``@require_torch_multigpu`` | -+----------+---------------------------------+ -| ``< 2`` | ``@require_torch_non_multigpu`` | -+----------+---------------------------------+ ++----------+----------------------------------+ +| n gpus | decorator | ++==========+==================================+ +| ``>= 0`` | ``@require_torch`` | ++----------+----------------------------------+ +| ``>= 1`` | ``@require_torch_gpu`` | ++----------+----------------------------------+ +| ``>= 2`` | ``@require_torch_multi_gpu`` | ++----------+----------------------------------+ +| ``< 2`` | ``@require_torch_non_multi_gpu`` | ++----------+----------------------------------+ For example, here is a test that must be run only when there are 2 or more GPUs available and pytorch is installed: .. code-block:: python - @require_torch_multigpu - def test_example_with_multigpu(): + @require_torch_multi_gpu + def test_example_with_multi_gpu(): If a test requires ``tensorflow`` use the ``require_tf`` decorator. For example: @@ -454,7 +454,7 @@ last for them to work correctly. Here is an example of the correct usage: .. code-block:: python @parameterized.expand(...) - @require_torch_multigpu + @require_torch_multi_gpu def test_integration_foo(): This order problem doesn't exist with ``@pytest.mark.parametrize``, you can put it first or last and it will still diff --git a/examples/bert-loses-patience/test_run_glue_with_pabee.py b/examples/bert-loses-patience/test_run_glue_with_pabee.py index eaac5329379..10df36b5d8a 100644 --- a/examples/bert-loses-patience/test_run_glue_with_pabee.py +++ b/examples/bert-loses-patience/test_run_glue_with_pabee.py @@ -4,7 +4,7 @@ import sys from unittest.mock import patch import run_glue_with_pabee -from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me +from transformers.testing_utils import TestCasePlus, require_torch_non_multi_gpu_but_fix_me logging.basicConfig(level=logging.DEBUG) @@ -20,7 +20,7 @@ def get_setup_file(): class PabeeTests(TestCasePlus): - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_run_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) diff --git a/examples/deebert/test_glue_deebert.py b/examples/deebert/test_glue_deebert.py index 66faa557c0d..ce714ff5d26 100644 --- a/examples/deebert/test_glue_deebert.py +++ b/examples/deebert/test_glue_deebert.py @@ -5,7 +5,7 @@ import unittest from unittest.mock import patch import run_glue_deebert -from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow +from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, slow logging.basicConfig(level=logging.DEBUG) @@ -26,7 +26,7 @@ class DeeBertTests(unittest.TestCase): logger.addHandler(stream_handler) @slow - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_glue_deebert_train(self): train_args = """ diff --git a/examples/rag/test_distributed_retriever.py b/examples/rag/test_distributed_retriever.py index be874c83e8b..f7d1417a64a 100644 --- a/examples/rag/test_distributed_retriever.py +++ b/examples/rag/test_distributed_retriever.py @@ -16,7 +16,7 @@ from transformers.configuration_dpr import DPRConfig from transformers.configuration_rag import RagConfig from transformers.file_utils import is_datasets_available, is_faiss_available, is_psutil_available, is_torch_available from transformers.retrieval_rag import CustomHFIndex -from transformers.testing_utils import require_torch_non_multigpu_but_fix_me +from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me from transformers.tokenization_bart import BartTokenizer from transformers.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES from transformers.tokenization_dpr import DPRQuestionEncoderTokenizer @@ -179,7 +179,7 @@ class RagRetrieverTest(TestCase): retriever.init_retrieval(port) return retriever - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_pytorch_distributed_retriever_retrieve(self): n_docs = 1 retriever = self.get_dummy_pytorch_distributed_retriever(init_retrieval=True) @@ -195,7 +195,7 @@ class RagRetrieverTest(TestCase): self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc self.assertListEqual(doc_ids.tolist(), [[1], [0]]) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_custom_hf_index_retriever_retrieve(self): n_docs = 1 retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=False) @@ -211,7 +211,7 @@ class RagRetrieverTest(TestCase): self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc self.assertListEqual(doc_ids.tolist(), [[1], [0]]) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_custom_pytorch_distributed_retriever_retrieve_from_disk(self): n_docs = 1 retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=True) diff --git a/examples/seq2seq/test_bash_script.py b/examples/seq2seq/test_bash_script.py index fffe6c4be73..bf4302fd9fd 100644 --- a/examples/seq2seq/test_bash_script.py +++ b/examples/seq2seq/test_bash_script.py @@ -13,7 +13,7 @@ from distillation import BartSummarizationDistiller, distill_main from finetune import SummarizationModule, main from transformers import MarianMTModel from transformers.file_utils import cached_path -from transformers.testing_utils import TestCasePlus, require_torch_gpu, require_torch_non_multigpu_but_fix_me, slow +from transformers.testing_utils import TestCasePlus, require_torch_gpu, require_torch_non_multi_gpu_but_fix_me, slow from utils import load_json @@ -32,7 +32,7 @@ class TestMbartCc25Enro(TestCasePlus): @slow @require_torch_gpu - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_model_download(self): """This warms up the cache so that we can time the next test without including download time, which varies between machines.""" MarianMTModel.from_pretrained(MARIAN_MODEL) @@ -40,7 +40,7 @@ class TestMbartCc25Enro(TestCasePlus): # @timeout_decorator.timeout(1200) @slow @require_torch_gpu - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_train_mbart_cc25_enro_script(self): env_vars_to_replace = { "$MAX_LEN": 64, @@ -75,7 +75,7 @@ class TestMbartCc25Enro(TestCasePlus): --num_sanity_val_steps 0 --eval_beams 2 """.split() - # XXX: args.gpus > 1 : handle multigpu in the future + # XXX: args.gpus > 1 : handle multi_gpu in the future testargs = ["finetune.py"] + bash_script.split() + args with patch.object(sys, "argv", testargs): @@ -129,7 +129,7 @@ class TestDistilMarianNoTeacher(TestCasePlus): @timeout_decorator.timeout(600) @slow @require_torch_gpu - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_opus_mt_distill_script(self): data_dir = f"{self.test_file_dir_str}/test_data/wmt_en_ro" env_vars_to_replace = { @@ -172,7 +172,7 @@ class TestDistilMarianNoTeacher(TestCasePlus): parser = pl.Trainer.add_argparse_args(parser) parser = BartSummarizationDistiller.add_model_specific_args(parser, os.getcwd()) args = parser.parse_args() - # assert args.gpus == gpus THIS BREAKS for multigpu + # assert args.gpus == gpus THIS BREAKS for multi_gpu model = distill_main(args) diff --git a/examples/seq2seq/test_datasets.py b/examples/seq2seq/test_datasets.py index 625b6da347d..4c3814580c4 100644 --- a/examples/seq2seq/test_datasets.py +++ b/examples/seq2seq/test_datasets.py @@ -11,7 +11,7 @@ from save_len_file import save_len_file from test_seq2seq_examples import ARTICLES, BART_TINY, MARIAN_TINY, MBART_TINY, SUMMARIES, T5_TINY, make_test_data_dir from transformers import AutoTokenizer from transformers.modeling_bart import shift_tokens_right -from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me, slow +from transformers.testing_utils import TestCasePlus, require_torch_non_multi_gpu_but_fix_me, slow from utils import FAIRSEQ_AVAILABLE, DistributedSortishSampler, LegacySeq2SeqDataset, Seq2SeqDataset @@ -30,7 +30,7 @@ class TestAll(TestCasePlus): ], ) @slow - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_seq2seq_dataset_truncation(self, tok_name): tokenizer = AutoTokenizer.from_pretrained(tok_name) tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) @@ -70,7 +70,7 @@ class TestAll(TestCasePlus): break # No need to test every batch @parameterized.expand([BART_TINY, BERT_BASE_CASED]) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_legacy_dataset_truncation(self, tok): tokenizer = AutoTokenizer.from_pretrained(tok) tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) @@ -95,7 +95,7 @@ class TestAll(TestCasePlus): assert max_len_target > trunc_target # Truncated break # No need to test every batch - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_pack_dataset(self): tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25") @@ -114,7 +114,7 @@ class TestAll(TestCasePlus): assert orig_paths == new_paths @pytest.mark.skipif(not FAIRSEQ_AVAILABLE, reason="This test requires fairseq") - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_dynamic_batch_size(self): if not FAIRSEQ_AVAILABLE: return @@ -139,7 +139,7 @@ class TestAll(TestCasePlus): if failures: raise AssertionError(f"too many tokens in {len(failures)} batches") - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_sortish_sampler_reduces_padding(self): ds, _, tokenizer = self._get_dataset(max_len=512) bs = 2 @@ -179,7 +179,7 @@ class TestAll(TestCasePlus): ) return ds, max_tokens, tokenizer - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_distributed_sortish_sampler_splits_indices_between_procs(self): ds, max_tokens, tokenizer = self._get_dataset() ids1 = set(DistributedSortishSampler(ds, 256, num_replicas=2, rank=0, add_extra_examples=False)) @@ -195,7 +195,7 @@ class TestAll(TestCasePlus): PEGASUS_XSUM, ], ) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_dataset_kwargs(self, tok_name): tokenizer = AutoTokenizer.from_pretrained(tok_name) if tok_name == MBART_TINY: diff --git a/examples/seq2seq/test_fsmt_bleu_score.py b/examples/seq2seq/test_fsmt_bleu_score.py index 2be6b7d5285..2d5df03c9c1 100644 --- a/examples/seq2seq/test_fsmt_bleu_score.py +++ b/examples/seq2seq/test_fsmt_bleu_score.py @@ -22,7 +22,7 @@ from transformers import FSMTForConditionalGeneration, FSMTTokenizer from transformers.testing_utils import ( get_tests_dir, require_torch, - require_torch_non_multigpu_but_fix_me, + require_torch_non_multi_gpu_but_fix_me, slow, torch_device, ) @@ -54,7 +54,7 @@ class ModelEvalTester(unittest.TestCase): ] ) @slow - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_bleu_scores(self, pair, min_bleu_score): # note: this test is not testing the best performance since it only evals a small batch # but it should be enough to detect a regression in the output quality diff --git a/examples/seq2seq/test_make_student.py b/examples/seq2seq/test_make_student.py index 28b5672f0e3..ebb54bbfc43 100644 --- a/examples/seq2seq/test_make_student.py +++ b/examples/seq2seq/test_make_student.py @@ -4,7 +4,7 @@ import unittest from make_student import create_student_by_copying_alternating_layers from transformers import AutoConfig from transformers.file_utils import cached_property -from transformers.testing_utils import require_torch, require_torch_non_multigpu_but_fix_me +from transformers.testing_utils import require_torch, require_torch_non_multi_gpu_but_fix_me TINY_BART = "sshleifer/bart-tiny-random" @@ -17,28 +17,28 @@ class MakeStudentTester(unittest.TestCase): def teacher_config(self): return AutoConfig.from_pretrained(TINY_BART) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_valid_t5(self): student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=1) self.assertEqual(student.config.num_hidden_layers, 1) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_asymmetric_t5(self): student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=None) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_same_decoder_small_encoder(self): student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=None) self.assertEqual(student.config.encoder_layers, 1) self.assertEqual(student.config.decoder_layers, self.teacher_config.encoder_layers) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_small_enc_small_dec(self): student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=1) self.assertEqual(student.config.encoder_layers, 1) self.assertEqual(student.config.decoder_layers, 1) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_raises_assert(self): with self.assertRaises(AssertionError): create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=None, d=None) diff --git a/examples/seq2seq/test_seq2seq_examples.py b/examples/seq2seq/test_seq2seq_examples.py index 9afa6ab0f20..a2b306032e9 100644 --- a/examples/seq2seq/test_seq2seq_examples.py +++ b/examples/seq2seq/test_seq2seq_examples.py @@ -24,7 +24,7 @@ from transformers.testing_utils import ( CaptureStdout, TestCasePlus, require_torch_gpu, - require_torch_non_multigpu_but_fix_me, + require_torch_non_multi_gpu_but_fix_me, slow, ) from utils import ROUGE_KEYS, label_smoothed_nll_loss, lmap, load_json @@ -133,7 +133,7 @@ class TestSummarizationDistiller(TestCasePlus): @slow @require_torch_gpu - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_hub_configs(self): """I put require_torch_gpu cause I only want this to run with self-scheduled.""" @@ -151,12 +151,12 @@ class TestSummarizationDistiller(TestCasePlus): failures.append(m) assert not failures, f"The following models could not be loaded through AutoConfig: {failures}" - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_distill_no_teacher(self): updates = dict(student_encoder_layers=2, student_decoder_layers=1, no_teacher=True) self._test_distiller_cli(updates) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_distill_checkpointing_with_teacher(self): updates = dict( student_encoder_layers=2, @@ -181,7 +181,7 @@ class TestSummarizationDistiller(TestCasePlus): convert_pl_to_hf(ckpts[0], transformer_ckpts[0].parent, out_path_new) assert os.path.exists(os.path.join(out_path_new, "pytorch_model.bin")) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_loss_fn(self): model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY, return_dict=True) input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"] @@ -202,7 +202,7 @@ class TestSummarizationDistiller(TestCasePlus): # TODO: understand why this breaks self.assertEqual(nll_loss, model_computed_loss) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_distill_mbart(self): updates = dict( student_encoder_layers=2, @@ -227,7 +227,7 @@ class TestSummarizationDistiller(TestCasePlus): assert len(all_files) > 2 self.assertEqual(len(transformer_ckpts), 2) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_distill_t5(self): updates = dict( student_encoder_layers=1, @@ -309,21 +309,21 @@ class TestTheRest(TestCasePlus): # test one model to quickly (no-@slow) catch simple problems and do an # extensive testing of functionality with multiple models as @slow separately - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_run_eval(self): self.run_eval_tester(T5_TINY) # any extra models should go into the list here - can be slow @parameterized.expand([BART_TINY, MBART_TINY]) @slow - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_run_eval_slow(self, model): self.run_eval_tester(model) # testing with 2 models to validate: 1. translation (t5) 2. summarization (mbart) @parameterized.expand([T5_TINY, MBART_TINY]) @slow - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_run_eval_search(self, model): input_file_name = Path(self.get_auto_remove_tmp_dir()) / "utest_input.source" output_file_name = input_file_name.parent / "utest_output.txt" @@ -374,7 +374,7 @@ class TestTheRest(TestCasePlus): @parameterized.expand( [T5_TINY, BART_TINY, MBART_TINY, MARIAN_TINY, FSMT_TINY], ) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_finetune(self, model): args_d: dict = CHEAP_ARGS.copy() task = "translation" if model in [MBART_TINY, MARIAN_TINY, FSMT_TINY] else "summarization" @@ -426,7 +426,7 @@ class TestTheRest(TestCasePlus): assert isinstance(example_batch, dict) assert len(example_batch) >= 4 - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_finetune_extra_model_args(self): args_d: dict = CHEAP_ARGS.copy() @@ -477,7 +477,7 @@ class TestTheRest(TestCasePlus): model = main(args) assert str(excinfo.value) == f"model config doesn't have a `{unsupported_param}` attribute" - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_finetune_lr_schedulers(self): args_d: dict = CHEAP_ARGS.copy() diff --git a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py index 69b979fa01a..eafa7e37fe2 100644 --- a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py +++ b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py @@ -8,7 +8,7 @@ from transformers.testing_utils import ( execute_subprocess_async, get_gpu_count, require_torch_gpu, - require_torch_multigpu, + require_torch_multi_gpu, slow, ) @@ -21,8 +21,8 @@ class TestSummarizationDistillerMultiGPU(TestCasePlus): def setUpClass(cls): return cls - @require_torch_multigpu - def test_multigpu(self): + @require_torch_multi_gpu + def test_multi_gpu(self): updates = dict( no_teacher=True, diff --git a/examples/seq2seq/test_tatoeba_conversion.py b/examples/seq2seq/test_tatoeba_conversion.py index 4f97eca133c..22adc6b2e62 100644 --- a/examples/seq2seq/test_tatoeba_conversion.py +++ b/examples/seq2seq/test_tatoeba_conversion.py @@ -4,7 +4,7 @@ import unittest from transformers.convert_marian_tatoeba_to_pytorch import DEFAULT_REPO, TatoebaConverter from transformers.file_utils import cached_property -from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow +from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, slow @unittest.skipUnless(os.path.exists(DEFAULT_REPO), "Tatoeba directory does not exist.") @@ -15,12 +15,12 @@ class TatoebaConversionTester(unittest.TestCase): return TatoebaConverter(save_dir=tmp_dir) @slow - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_resolver(self): self.resolver.convert_models(["heb-eng"]) @slow - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_model_card(self): content, mmeta = self.resolver.write_model_card("opus-mt-he-en", dry_run=True) assert mmeta["long_pair"] == "heb-eng" diff --git a/examples/test_examples.py b/examples/test_examples.py index e2c0ae42dea..f5651f66487 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -23,7 +23,7 @@ from unittest.mock import patch import torch from transformers.file_utils import is_apex_available -from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me, torch_device +from transformers.testing_utils import TestCasePlus, require_torch_non_multi_gpu_but_fix_me, torch_device SRC_DIRS = [ @@ -67,7 +67,7 @@ def is_cuda_and_apex_available(): class ExamplesTests(TestCasePlus): - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_run_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -100,7 +100,7 @@ class ExamplesTests(TestCasePlus): for value in result.values(): self.assertGreaterEqual(value, 0.75) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_run_pl_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -138,7 +138,7 @@ class ExamplesTests(TestCasePlus): # self.assertGreaterEqual(v, 0.75, f"({k})") # - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_run_clm(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -170,7 +170,7 @@ class ExamplesTests(TestCasePlus): result = run_clm.main() self.assertLess(result["perplexity"], 100) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_run_mlm(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -196,7 +196,7 @@ class ExamplesTests(TestCasePlus): result = run_mlm.main() self.assertLess(result["perplexity"], 42) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_run_ner(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -227,7 +227,7 @@ class ExamplesTests(TestCasePlus): self.assertGreaterEqual(result["eval_precision"], 0.75) self.assertLess(result["eval_loss"], 0.5) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_run_squad(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -256,7 +256,7 @@ class ExamplesTests(TestCasePlus): self.assertGreaterEqual(result["f1"], 25) self.assertGreaterEqual(result["exact"], 21) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_generation(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) diff --git a/examples/test_xla_examples.py b/examples/test_xla_examples.py index f8026554b73..86c031cea12 100644 --- a/examples/test_xla_examples.py +++ b/examples/test_xla_examples.py @@ -20,7 +20,7 @@ import unittest from time import time from unittest.mock import patch -from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, require_torch_tpu +from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, require_torch_tpu logging.basicConfig(level=logging.DEBUG) @@ -30,7 +30,7 @@ logger = logging.getLogger() @require_torch_tpu class TorchXLAExamplesTests(unittest.TestCase): - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_run_glue(self): import xla_spawn @@ -82,7 +82,7 @@ class TorchXLAExamplesTests(unittest.TestCase): # Assert that the script takes less than 300 seconds to make sure it doesn't hang. self.assertLess(end - start, 500) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_trainer_tpu(self): import xla_spawn diff --git a/examples/token-classification/test_ner_examples.py b/examples/token-classification/test_ner_examples.py index d8ba83983ff..4a9e176f33e 100644 --- a/examples/token-classification/test_ner_examples.py +++ b/examples/token-classification/test_ner_examples.py @@ -4,7 +4,7 @@ import unittest from unittest.mock import patch import run_ner_old as run_ner -from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow +from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, slow logging.basicConfig(level=logging.INFO) @@ -14,7 +14,7 @@ logger = logging.getLogger() class ExamplesTests(unittest.TestCase): @slow - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_run_ner(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -35,7 +35,7 @@ class ExamplesTests(unittest.TestCase): result = run_ner.main() self.assertLess(result["eval_loss"], 1.5) - @require_torch_non_multigpu_but_fix_me + @require_torch_non_multi_gpu_but_fix_me def test_run_ner_pl(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 6a4942d5075..cbce4e96547 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -193,13 +193,13 @@ def require_tokenizers(test_case): return test_case -def require_torch_multigpu(test_case): +def require_torch_multi_gpu(test_case): """ Decorator marking a test that requires a multi-GPU setup (in PyTorch). These tests are skipped on a machine without multiple GPUs. - To run *only* the multigpu tests, assuming all test names contain multigpu: $ pytest -sv ./tests -k "multigpu" + To run *only* the multi_gpu tests, assuming all test names contain multi_gpu: $ pytest -sv ./tests -k "multi_gpu" """ if not _torch_available: return unittest.skip("test requires PyTorch")(test_case) @@ -212,7 +212,7 @@ def require_torch_multigpu(test_case): return test_case -def require_torch_non_multigpu(test_case): +def require_torch_non_multi_gpu(test_case): """ Decorator marking a test that requires 0 or 1 GPU setup (in PyTorch). """ @@ -227,10 +227,10 @@ def require_torch_non_multigpu(test_case): return test_case -# this is a decorator identical to require_torch_non_multigpu, but is used as a quick band-aid to +# this is a decorator identical to require_torch_non_multi_gpu, but is used as a quick band-aid to # allow all of examples to be run multi-gpu CI and it reminds us that tests decorated with this one # need to be ported and aren't so by design. -require_torch_non_multigpu_but_fix_me = require_torch_non_multigpu +require_torch_non_multi_gpu_but_fix_me = require_torch_non_multi_gpu def require_torch_tpu(test_case): diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 7435a6a1f5a..4de6d3f62d6 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -23,7 +23,7 @@ from typing import List, Tuple from transformers import is_torch_available from transformers.file_utils import WEIGHTS_NAME -from transformers.testing_utils import require_torch, require_torch_multigpu, slow, torch_device +from transformers.testing_utils import require_torch, require_torch_multi_gpu, slow, torch_device if is_torch_available(): @@ -928,8 +928,8 @@ class ModelTesterMixin: with torch.no_grad(): model(**inputs)[0] - @require_torch_multigpu - def test_multigpu_data_parallel_forward(self): + @require_torch_multi_gpu + def test_multi_gpu_data_parallel_forward(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() # some params shouldn't be scattered by nn.DataParallel diff --git a/tests/test_modeling_rag.py b/tests/test_modeling_rag.py index fe8fefde4d8..ec8fe1b508a 100644 --- a/tests/test_modeling_rag.py +++ b/tests/test_modeling_rag.py @@ -29,7 +29,7 @@ from transformers.testing_utils import ( require_sentencepiece, require_tokenizers, require_torch, - require_torch_non_multigpu, + require_torch_non_multi_gpu, slow, torch_device, ) @@ -581,7 +581,7 @@ class RagDPRT5Test(RagTestMixin, unittest.TestCase): @require_retrieval @require_sentencepiece @require_tokenizers -@require_torch_non_multigpu +@require_torch_non_multi_gpu class RagModelIntegrationTests(unittest.TestCase): @cached_property def sequence_model(self): diff --git a/tests/test_modeling_reformer.py b/tests/test_modeling_reformer.py index 2d5884cd75e..3a92e3b81ce 100644 --- a/tests/test_modeling_reformer.py +++ b/tests/test_modeling_reformer.py @@ -20,7 +20,7 @@ from transformers.testing_utils import ( require_sentencepiece, require_tokenizers, require_torch, - require_torch_multigpu, + require_torch_multi_gpu, slow, torch_device, ) @@ -562,8 +562,8 @@ class ReformerTesterMixin: config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_reformer_model_fp16_generate(*config_and_inputs) - @require_torch_multigpu - def test_multigpu_data_parallel_forward(self): + @require_torch_multi_gpu + def test_multi_gpu_data_parallel_forward(self): # Opt-out of this test. pass diff --git a/tests/test_modeling_transfo_xl.py b/tests/test_modeling_transfo_xl.py index 7f6478e3a7e..ce199f2da8e 100644 --- a/tests/test_modeling_transfo_xl.py +++ b/tests/test_modeling_transfo_xl.py @@ -17,7 +17,7 @@ import random import unittest from transformers import is_torch_available -from transformers.testing_utils import require_torch, require_torch_multigpu, slow, torch_device +from transformers.testing_utils import require_torch, require_torch_multi_gpu, slow, torch_device from .test_configuration_common import ConfigTester from .test_generation_utils import GenerationTesterMixin @@ -205,8 +205,8 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC output_result = self.model_tester.create_transfo_xl_lm_head(*config_and_inputs) self.model_tester.check_transfo_xl_lm_head_output(output_result) - @require_torch_multigpu - def test_multigpu_data_parallel_forward(self): + @require_torch_multi_gpu + def test_multi_gpu_data_parallel_forward(self): # Opt-out of this test. pass diff --git a/tests/test_trainer_distributed.py b/tests/test_trainer_distributed.py index 511bddd15cb..73aa2e69258 100644 --- a/tests/test_trainer_distributed.py +++ b/tests/test_trainer_distributed.py @@ -2,7 +2,7 @@ import sys from typing import Dict from transformers import EvalPrediction, HfArgumentParser, TrainingArguments, is_torch_available -from transformers.testing_utils import TestCasePlus, execute_subprocess_async, require_torch_multigpu +from transformers.testing_utils import TestCasePlus, execute_subprocess_async, require_torch_multi_gpu from transformers.utils import logging @@ -44,7 +44,7 @@ if is_torch_available(): class TestTrainerDistributed(TestCasePlus): - @require_torch_multigpu + @require_torch_multi_gpu def test_trainer(self): distributed_args = f"""