using multi_gpu consistently (#8446)

* s|multiple_gpu|multi_gpu|g; s|multigpu|multi_gpu|g'

* doc
This commit is contained in:
Stas Bekman 2020-11-10 10:23:58 -08:00 committed by GitHub
parent b93569457f
commit 02bdfc0251
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 117 additions and 117 deletions

View File

@ -135,7 +135,7 @@ jobs:
name: run_all_tests_tf_gpu_test_reports name: run_all_tests_tf_gpu_test_reports
path: reports path: reports
run_tests_torch_multiple_gpu: run_tests_torch_multi_gpu:
runs-on: [self-hosted, multi-gpu] runs-on: [self-hosted, multi-gpu]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
@ -154,7 +154,7 @@ jobs:
id: cache id: cache
with: with:
path: .env path: .env
key: v1.1-tests_torch_multiple_gpu-${{ hashFiles('setup.py') }} key: v1.1-tests_torch_multi_gpu-${{ hashFiles('setup.py') }}
- name: Create new python env (on self-hosted runners we have to handle isolation ourselves) - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
run: | run: |
@ -181,11 +181,11 @@ jobs:
OMP_NUM_THREADS: 1 OMP_NUM_THREADS: 1
run: | run: |
source .env/bin/activate source .env/bin/activate
python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_torch_multiple_gpu tests python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_torch_multi_gpu tests
- name: Failure short reports - name: Failure short reports
if: ${{ always() }} if: ${{ always() }}
run: cat reports/tests_torch_multiple_gpu_failures_short.txt run: cat reports/tests_torch_multi_gpu_failures_short.txt
- name: Test suite reports artifacts - name: Test suite reports artifacts
if: ${{ always() }} if: ${{ always() }}
@ -194,7 +194,7 @@ jobs:
name: run_all_tests_torch_multi_gpu_test_reports name: run_all_tests_torch_multi_gpu_test_reports
path: reports path: reports
run_tests_tf_multiple_gpu: run_tests_tf_multi_gpu:
runs-on: [self-hosted, multi-gpu] runs-on: [self-hosted, multi-gpu]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
@ -213,7 +213,7 @@ jobs:
id: cache id: cache
with: with:
path: .env path: .env
key: v1.1-tests_tf_multiple_gpu-${{ hashFiles('setup.py') }} key: v1.1-tests_tf_multi_gpu-${{ hashFiles('setup.py') }}
- name: Create new python env (on self-hosted runners we have to handle isolation ourselves) - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
run: | run: |
@ -240,11 +240,11 @@ jobs:
OMP_NUM_THREADS: 1 OMP_NUM_THREADS: 1
run: | run: |
source .env/bin/activate source .env/bin/activate
python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_tf_multiple_gpu tests python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_tf_multi_gpu tests
- name: Failure short reports - name: Failure short reports
if: ${{ always() }} if: ${{ always() }}
run: cat reports/tests_tf_multiple_gpu_failures_short.txt run: cat reports/tests_tf_multi_gpu_failures_short.txt
- name: Test suite reports artifacts - name: Test suite reports artifacts
if: ${{ always() }} if: ${{ always() }}

View File

@ -187,7 +187,7 @@ jobs:
name: run_all_tests_tf_gpu_test_reports name: run_all_tests_tf_gpu_test_reports
path: reports path: reports
run_all_tests_torch_multiple_gpu: run_all_tests_torch_multi_gpu:
runs-on: [self-hosted, multi-gpu] runs-on: [self-hosted, multi-gpu]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
@ -238,11 +238,11 @@ jobs:
RUN_SLOW: yes RUN_SLOW: yes
run: | run: |
source .env/bin/activate source .env/bin/activate
python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_multiple_gpu tests python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_multi_gpu tests
- name: Failure short reports - name: Failure short reports
if: ${{ always() }} if: ${{ always() }}
run: cat reports/tests_torch_multiple_gpu_failures_short.txt run: cat reports/tests_torch_multi_gpu_failures_short.txt
- name: Run examples tests on multi-GPU - name: Run examples tests on multi-GPU
env: env:
@ -250,11 +250,11 @@ jobs:
RUN_SLOW: yes RUN_SLOW: yes
run: | run: |
source .env/bin/activate source .env/bin/activate
python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multiple_gpu examples python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multi_gpu examples
- name: Failure short reports - name: Failure short reports
if: ${{ always() }} if: ${{ always() }}
run: cat reports/examples_torch_multiple_gpu_failures_short.txt run: cat reports/examples_torch_multi_gpu_failures_short.txt
- name: Run all pipeline tests on multi-GPU - name: Run all pipeline tests on multi-GPU
if: ${{ always() }} if: ${{ always() }}
@ -265,11 +265,11 @@ jobs:
RUN_PIPELINE_TESTS: yes RUN_PIPELINE_TESTS: yes
run: | run: |
source .env/bin/activate source .env/bin/activate
python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_torch_pipeline_multiple_gpu tests python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests
- name: Failure short reports - name: Failure short reports
if: ${{ always() }} if: ${{ always() }}
run: cat reports/tests_torch_pipeline_multiple_gpu_failures_short.txt run: cat reports/tests_torch_pipeline_multi_gpu_failures_short.txt
- name: Test suite reports artifacts - name: Test suite reports artifacts
if: ${{ always() }} if: ${{ always() }}
@ -278,7 +278,7 @@ jobs:
name: run_all_tests_torch_multi_gpu_test_reports name: run_all_tests_torch_multi_gpu_test_reports
path: reports path: reports
run_all_tests_tf_multiple_gpu: run_all_tests_tf_multi_gpu:
runs-on: [self-hosted, multi-gpu] runs-on: [self-hosted, multi-gpu]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
@ -329,11 +329,11 @@ jobs:
RUN_SLOW: yes RUN_SLOW: yes
run: | run: |
source .env/bin/activate source .env/bin/activate
python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_tf_multiple_gpu tests python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_tf_multi_gpu tests
- name: Failure short reports - name: Failure short reports
if: ${{ always() }} if: ${{ always() }}
run: cat reports/tests_tf_multiple_gpu_failures_short.txt run: cat reports/tests_tf_multi_gpu_failures_short.txt
- name: Run all pipeline tests on multi-GPU - name: Run all pipeline tests on multi-GPU
if: ${{ always() }} if: ${{ always() }}
@ -344,11 +344,11 @@ jobs:
RUN_PIPELINE_TESTS: yes RUN_PIPELINE_TESTS: yes
run: | run: |
source .env/bin/activate source .env/bin/activate
python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_tf_pipelines_multiple_gpu tests python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_tf_pipelines_multi_gpu tests
- name: Failure short reports - name: Failure short reports
if: ${{ always() }} if: ${{ always() }}
run: cat reports/tests_tf_multiple_gpu_pipelines_failures_short.txt run: cat reports/tests_tf_multi_gpu_pipelines_failures_short.txt
- name: Test suite reports artifacts - name: Test suite reports artifacts
if: ${{ always() }} if: ${{ always() }}

View File

@ -405,32 +405,32 @@ decorators are used to set the requirements of tests CPU/GPU/TPU-wise:
* ``require_torch`` - this test will run only under torch * ``require_torch`` - this test will run only under torch
* ``require_torch_gpu`` - as ``require_torch`` plus requires at least 1 GPU * ``require_torch_gpu`` - as ``require_torch`` plus requires at least 1 GPU
* ``require_torch_multigpu`` - as ``require_torch`` plus requires at least 2 GPUs * ``require_torch_multi_gpu`` - as ``require_torch`` plus requires at least 2 GPUs
* ``require_torch_non_multigpu`` - as ``require_torch`` plus requires 0 or 1 GPUs * ``require_torch_non_multi_gpu`` - as ``require_torch`` plus requires 0 or 1 GPUs
* ``require_torch_tpu`` - as ``require_torch`` plus requires at least 1 TPU * ``require_torch_tpu`` - as ``require_torch`` plus requires at least 1 TPU
Let's depict the GPU requirements in the following table: Let's depict the GPU requirements in the following table:
+----------+---------------------------------+ +----------+----------------------------------+
| n gpus | decorator | | n gpus | decorator |
+==========+=================================+ +==========+==================================+
| ``>= 0`` | ``@require_torch`` | | ``>= 0`` | ``@require_torch`` |
+----------+---------------------------------+ +----------+----------------------------------+
| ``>= 1`` | ``@require_torch_gpu`` | | ``>= 1`` | ``@require_torch_gpu`` |
+----------+---------------------------------+ +----------+----------------------------------+
| ``>= 2`` | ``@require_torch_multigpu`` | | ``>= 2`` | ``@require_torch_multi_gpu`` |
+----------+---------------------------------+ +----------+----------------------------------+
| ``< 2`` | ``@require_torch_non_multigpu`` | | ``< 2`` | ``@require_torch_non_multi_gpu`` |
+----------+---------------------------------+ +----------+----------------------------------+
For example, here is a test that must be run only when there are 2 or more GPUs available and pytorch is installed: For example, here is a test that must be run only when there are 2 or more GPUs available and pytorch is installed:
.. code-block:: python .. code-block:: python
@require_torch_multigpu @require_torch_multi_gpu
def test_example_with_multigpu(): def test_example_with_multi_gpu():
If a test requires ``tensorflow`` use the ``require_tf`` decorator. For example: If a test requires ``tensorflow`` use the ``require_tf`` decorator. For example:
@ -454,7 +454,7 @@ last for them to work correctly. Here is an example of the correct usage:
.. code-block:: python .. code-block:: python
@parameterized.expand(...) @parameterized.expand(...)
@require_torch_multigpu @require_torch_multi_gpu
def test_integration_foo(): def test_integration_foo():
This order problem doesn't exist with ``@pytest.mark.parametrize``, you can put it first or last and it will still This order problem doesn't exist with ``@pytest.mark.parametrize``, you can put it first or last and it will still

View File

@ -4,7 +4,7 @@ import sys
from unittest.mock import patch from unittest.mock import patch
import run_glue_with_pabee import run_glue_with_pabee
from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me from transformers.testing_utils import TestCasePlus, require_torch_non_multi_gpu_but_fix_me
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
@ -20,7 +20,7 @@ def get_setup_file():
class PabeeTests(TestCasePlus): class PabeeTests(TestCasePlus):
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_run_glue(self): def test_run_glue(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)

View File

@ -5,7 +5,7 @@ import unittest
from unittest.mock import patch from unittest.mock import patch
import run_glue_deebert import run_glue_deebert
from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, slow
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
@ -26,7 +26,7 @@ class DeeBertTests(unittest.TestCase):
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
@slow @slow
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_glue_deebert_train(self): def test_glue_deebert_train(self):
train_args = """ train_args = """

View File

@ -16,7 +16,7 @@ from transformers.configuration_dpr import DPRConfig
from transformers.configuration_rag import RagConfig from transformers.configuration_rag import RagConfig
from transformers.file_utils import is_datasets_available, is_faiss_available, is_psutil_available, is_torch_available from transformers.file_utils import is_datasets_available, is_faiss_available, is_psutil_available, is_torch_available
from transformers.retrieval_rag import CustomHFIndex from transformers.retrieval_rag import CustomHFIndex
from transformers.testing_utils import require_torch_non_multigpu_but_fix_me from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me
from transformers.tokenization_bart import BartTokenizer from transformers.tokenization_bart import BartTokenizer
from transformers.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES from transformers.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES
from transformers.tokenization_dpr import DPRQuestionEncoderTokenizer from transformers.tokenization_dpr import DPRQuestionEncoderTokenizer
@ -179,7 +179,7 @@ class RagRetrieverTest(TestCase):
retriever.init_retrieval(port) retriever.init_retrieval(port)
return retriever return retriever
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_pytorch_distributed_retriever_retrieve(self): def test_pytorch_distributed_retriever_retrieve(self):
n_docs = 1 n_docs = 1
retriever = self.get_dummy_pytorch_distributed_retriever(init_retrieval=True) retriever = self.get_dummy_pytorch_distributed_retriever(init_retrieval=True)
@ -195,7 +195,7 @@ class RagRetrieverTest(TestCase):
self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc
self.assertListEqual(doc_ids.tolist(), [[1], [0]]) self.assertListEqual(doc_ids.tolist(), [[1], [0]])
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_custom_hf_index_retriever_retrieve(self): def test_custom_hf_index_retriever_retrieve(self):
n_docs = 1 n_docs = 1
retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=False) retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=False)
@ -211,7 +211,7 @@ class RagRetrieverTest(TestCase):
self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc
self.assertListEqual(doc_ids.tolist(), [[1], [0]]) self.assertListEqual(doc_ids.tolist(), [[1], [0]])
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_custom_pytorch_distributed_retriever_retrieve_from_disk(self): def test_custom_pytorch_distributed_retriever_retrieve_from_disk(self):
n_docs = 1 n_docs = 1
retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=True) retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=True)

View File

@ -13,7 +13,7 @@ from distillation import BartSummarizationDistiller, distill_main
from finetune import SummarizationModule, main from finetune import SummarizationModule, main
from transformers import MarianMTModel from transformers import MarianMTModel
from transformers.file_utils import cached_path from transformers.file_utils import cached_path
from transformers.testing_utils import TestCasePlus, require_torch_gpu, require_torch_non_multigpu_but_fix_me, slow from transformers.testing_utils import TestCasePlus, require_torch_gpu, require_torch_non_multi_gpu_but_fix_me, slow
from utils import load_json from utils import load_json
@ -32,7 +32,7 @@ class TestMbartCc25Enro(TestCasePlus):
@slow @slow
@require_torch_gpu @require_torch_gpu
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_model_download(self): def test_model_download(self):
"""This warms up the cache so that we can time the next test without including download time, which varies between machines.""" """This warms up the cache so that we can time the next test without including download time, which varies between machines."""
MarianMTModel.from_pretrained(MARIAN_MODEL) MarianMTModel.from_pretrained(MARIAN_MODEL)
@ -40,7 +40,7 @@ class TestMbartCc25Enro(TestCasePlus):
# @timeout_decorator.timeout(1200) # @timeout_decorator.timeout(1200)
@slow @slow
@require_torch_gpu @require_torch_gpu
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_train_mbart_cc25_enro_script(self): def test_train_mbart_cc25_enro_script(self):
env_vars_to_replace = { env_vars_to_replace = {
"$MAX_LEN": 64, "$MAX_LEN": 64,
@ -75,7 +75,7 @@ class TestMbartCc25Enro(TestCasePlus):
--num_sanity_val_steps 0 --num_sanity_val_steps 0
--eval_beams 2 --eval_beams 2
""".split() """.split()
# XXX: args.gpus > 1 : handle multigpu in the future # XXX: args.gpus > 1 : handle multi_gpu in the future
testargs = ["finetune.py"] + bash_script.split() + args testargs = ["finetune.py"] + bash_script.split() + args
with patch.object(sys, "argv", testargs): with patch.object(sys, "argv", testargs):
@ -129,7 +129,7 @@ class TestDistilMarianNoTeacher(TestCasePlus):
@timeout_decorator.timeout(600) @timeout_decorator.timeout(600)
@slow @slow
@require_torch_gpu @require_torch_gpu
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_opus_mt_distill_script(self): def test_opus_mt_distill_script(self):
data_dir = f"{self.test_file_dir_str}/test_data/wmt_en_ro" data_dir = f"{self.test_file_dir_str}/test_data/wmt_en_ro"
env_vars_to_replace = { env_vars_to_replace = {
@ -172,7 +172,7 @@ class TestDistilMarianNoTeacher(TestCasePlus):
parser = pl.Trainer.add_argparse_args(parser) parser = pl.Trainer.add_argparse_args(parser)
parser = BartSummarizationDistiller.add_model_specific_args(parser, os.getcwd()) parser = BartSummarizationDistiller.add_model_specific_args(parser, os.getcwd())
args = parser.parse_args() args = parser.parse_args()
# assert args.gpus == gpus THIS BREAKS for multigpu # assert args.gpus == gpus THIS BREAKS for multi_gpu
model = distill_main(args) model = distill_main(args)

View File

@ -11,7 +11,7 @@ from save_len_file import save_len_file
from test_seq2seq_examples import ARTICLES, BART_TINY, MARIAN_TINY, MBART_TINY, SUMMARIES, T5_TINY, make_test_data_dir from test_seq2seq_examples import ARTICLES, BART_TINY, MARIAN_TINY, MBART_TINY, SUMMARIES, T5_TINY, make_test_data_dir
from transformers import AutoTokenizer from transformers import AutoTokenizer
from transformers.modeling_bart import shift_tokens_right from transformers.modeling_bart import shift_tokens_right
from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me, slow from transformers.testing_utils import TestCasePlus, require_torch_non_multi_gpu_but_fix_me, slow
from utils import FAIRSEQ_AVAILABLE, DistributedSortishSampler, LegacySeq2SeqDataset, Seq2SeqDataset from utils import FAIRSEQ_AVAILABLE, DistributedSortishSampler, LegacySeq2SeqDataset, Seq2SeqDataset
@ -30,7 +30,7 @@ class TestAll(TestCasePlus):
], ],
) )
@slow @slow
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_seq2seq_dataset_truncation(self, tok_name): def test_seq2seq_dataset_truncation(self, tok_name):
tokenizer = AutoTokenizer.from_pretrained(tok_name) tokenizer = AutoTokenizer.from_pretrained(tok_name)
tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir())
@ -70,7 +70,7 @@ class TestAll(TestCasePlus):
break # No need to test every batch break # No need to test every batch
@parameterized.expand([BART_TINY, BERT_BASE_CASED]) @parameterized.expand([BART_TINY, BERT_BASE_CASED])
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_legacy_dataset_truncation(self, tok): def test_legacy_dataset_truncation(self, tok):
tokenizer = AutoTokenizer.from_pretrained(tok) tokenizer = AutoTokenizer.from_pretrained(tok)
tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir())
@ -95,7 +95,7 @@ class TestAll(TestCasePlus):
assert max_len_target > trunc_target # Truncated assert max_len_target > trunc_target # Truncated
break # No need to test every batch break # No need to test every batch
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_pack_dataset(self): def test_pack_dataset(self):
tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25") tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25")
@ -114,7 +114,7 @@ class TestAll(TestCasePlus):
assert orig_paths == new_paths assert orig_paths == new_paths
@pytest.mark.skipif(not FAIRSEQ_AVAILABLE, reason="This test requires fairseq") @pytest.mark.skipif(not FAIRSEQ_AVAILABLE, reason="This test requires fairseq")
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_dynamic_batch_size(self): def test_dynamic_batch_size(self):
if not FAIRSEQ_AVAILABLE: if not FAIRSEQ_AVAILABLE:
return return
@ -139,7 +139,7 @@ class TestAll(TestCasePlus):
if failures: if failures:
raise AssertionError(f"too many tokens in {len(failures)} batches") raise AssertionError(f"too many tokens in {len(failures)} batches")
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_sortish_sampler_reduces_padding(self): def test_sortish_sampler_reduces_padding(self):
ds, _, tokenizer = self._get_dataset(max_len=512) ds, _, tokenizer = self._get_dataset(max_len=512)
bs = 2 bs = 2
@ -179,7 +179,7 @@ class TestAll(TestCasePlus):
) )
return ds, max_tokens, tokenizer return ds, max_tokens, tokenizer
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_distributed_sortish_sampler_splits_indices_between_procs(self): def test_distributed_sortish_sampler_splits_indices_between_procs(self):
ds, max_tokens, tokenizer = self._get_dataset() ds, max_tokens, tokenizer = self._get_dataset()
ids1 = set(DistributedSortishSampler(ds, 256, num_replicas=2, rank=0, add_extra_examples=False)) ids1 = set(DistributedSortishSampler(ds, 256, num_replicas=2, rank=0, add_extra_examples=False))
@ -195,7 +195,7 @@ class TestAll(TestCasePlus):
PEGASUS_XSUM, PEGASUS_XSUM,
], ],
) )
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_dataset_kwargs(self, tok_name): def test_dataset_kwargs(self, tok_name):
tokenizer = AutoTokenizer.from_pretrained(tok_name) tokenizer = AutoTokenizer.from_pretrained(tok_name)
if tok_name == MBART_TINY: if tok_name == MBART_TINY:

View File

@ -22,7 +22,7 @@ from transformers import FSMTForConditionalGeneration, FSMTTokenizer
from transformers.testing_utils import ( from transformers.testing_utils import (
get_tests_dir, get_tests_dir,
require_torch, require_torch,
require_torch_non_multigpu_but_fix_me, require_torch_non_multi_gpu_but_fix_me,
slow, slow,
torch_device, torch_device,
) )
@ -54,7 +54,7 @@ class ModelEvalTester(unittest.TestCase):
] ]
) )
@slow @slow
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_bleu_scores(self, pair, min_bleu_score): def test_bleu_scores(self, pair, min_bleu_score):
# note: this test is not testing the best performance since it only evals a small batch # note: this test is not testing the best performance since it only evals a small batch
# but it should be enough to detect a regression in the output quality # but it should be enough to detect a regression in the output quality

View File

@ -4,7 +4,7 @@ import unittest
from make_student import create_student_by_copying_alternating_layers from make_student import create_student_by_copying_alternating_layers
from transformers import AutoConfig from transformers import AutoConfig
from transformers.file_utils import cached_property from transformers.file_utils import cached_property
from transformers.testing_utils import require_torch, require_torch_non_multigpu_but_fix_me from transformers.testing_utils import require_torch, require_torch_non_multi_gpu_but_fix_me
TINY_BART = "sshleifer/bart-tiny-random" TINY_BART = "sshleifer/bart-tiny-random"
@ -17,28 +17,28 @@ class MakeStudentTester(unittest.TestCase):
def teacher_config(self): def teacher_config(self):
return AutoConfig.from_pretrained(TINY_BART) return AutoConfig.from_pretrained(TINY_BART)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_valid_t5(self): def test_valid_t5(self):
student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=1) student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=1)
self.assertEqual(student.config.num_hidden_layers, 1) self.assertEqual(student.config.num_hidden_layers, 1)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_asymmetric_t5(self): def test_asymmetric_t5(self):
student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=None) student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=None)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_same_decoder_small_encoder(self): def test_same_decoder_small_encoder(self):
student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=None) student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=None)
self.assertEqual(student.config.encoder_layers, 1) self.assertEqual(student.config.encoder_layers, 1)
self.assertEqual(student.config.decoder_layers, self.teacher_config.encoder_layers) self.assertEqual(student.config.decoder_layers, self.teacher_config.encoder_layers)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_small_enc_small_dec(self): def test_small_enc_small_dec(self):
student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=1) student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=1)
self.assertEqual(student.config.encoder_layers, 1) self.assertEqual(student.config.encoder_layers, 1)
self.assertEqual(student.config.decoder_layers, 1) self.assertEqual(student.config.decoder_layers, 1)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_raises_assert(self): def test_raises_assert(self):
with self.assertRaises(AssertionError): with self.assertRaises(AssertionError):
create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=None, d=None) create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=None, d=None)

View File

@ -24,7 +24,7 @@ from transformers.testing_utils import (
CaptureStdout, CaptureStdout,
TestCasePlus, TestCasePlus,
require_torch_gpu, require_torch_gpu,
require_torch_non_multigpu_but_fix_me, require_torch_non_multi_gpu_but_fix_me,
slow, slow,
) )
from utils import ROUGE_KEYS, label_smoothed_nll_loss, lmap, load_json from utils import ROUGE_KEYS, label_smoothed_nll_loss, lmap, load_json
@ -133,7 +133,7 @@ class TestSummarizationDistiller(TestCasePlus):
@slow @slow
@require_torch_gpu @require_torch_gpu
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_hub_configs(self): def test_hub_configs(self):
"""I put require_torch_gpu cause I only want this to run with self-scheduled.""" """I put require_torch_gpu cause I only want this to run with self-scheduled."""
@ -151,12 +151,12 @@ class TestSummarizationDistiller(TestCasePlus):
failures.append(m) failures.append(m)
assert not failures, f"The following models could not be loaded through AutoConfig: {failures}" assert not failures, f"The following models could not be loaded through AutoConfig: {failures}"
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_distill_no_teacher(self): def test_distill_no_teacher(self):
updates = dict(student_encoder_layers=2, student_decoder_layers=1, no_teacher=True) updates = dict(student_encoder_layers=2, student_decoder_layers=1, no_teacher=True)
self._test_distiller_cli(updates) self._test_distiller_cli(updates)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_distill_checkpointing_with_teacher(self): def test_distill_checkpointing_with_teacher(self):
updates = dict( updates = dict(
student_encoder_layers=2, student_encoder_layers=2,
@ -181,7 +181,7 @@ class TestSummarizationDistiller(TestCasePlus):
convert_pl_to_hf(ckpts[0], transformer_ckpts[0].parent, out_path_new) convert_pl_to_hf(ckpts[0], transformer_ckpts[0].parent, out_path_new)
assert os.path.exists(os.path.join(out_path_new, "pytorch_model.bin")) assert os.path.exists(os.path.join(out_path_new, "pytorch_model.bin"))
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_loss_fn(self): def test_loss_fn(self):
model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY, return_dict=True) model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY, return_dict=True)
input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"] input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"]
@ -202,7 +202,7 @@ class TestSummarizationDistiller(TestCasePlus):
# TODO: understand why this breaks # TODO: understand why this breaks
self.assertEqual(nll_loss, model_computed_loss) self.assertEqual(nll_loss, model_computed_loss)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_distill_mbart(self): def test_distill_mbart(self):
updates = dict( updates = dict(
student_encoder_layers=2, student_encoder_layers=2,
@ -227,7 +227,7 @@ class TestSummarizationDistiller(TestCasePlus):
assert len(all_files) > 2 assert len(all_files) > 2
self.assertEqual(len(transformer_ckpts), 2) self.assertEqual(len(transformer_ckpts), 2)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_distill_t5(self): def test_distill_t5(self):
updates = dict( updates = dict(
student_encoder_layers=1, student_encoder_layers=1,
@ -309,21 +309,21 @@ class TestTheRest(TestCasePlus):
# test one model to quickly (no-@slow) catch simple problems and do an # test one model to quickly (no-@slow) catch simple problems and do an
# extensive testing of functionality with multiple models as @slow separately # extensive testing of functionality with multiple models as @slow separately
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_run_eval(self): def test_run_eval(self):
self.run_eval_tester(T5_TINY) self.run_eval_tester(T5_TINY)
# any extra models should go into the list here - can be slow # any extra models should go into the list here - can be slow
@parameterized.expand([BART_TINY, MBART_TINY]) @parameterized.expand([BART_TINY, MBART_TINY])
@slow @slow
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_run_eval_slow(self, model): def test_run_eval_slow(self, model):
self.run_eval_tester(model) self.run_eval_tester(model)
# testing with 2 models to validate: 1. translation (t5) 2. summarization (mbart) # testing with 2 models to validate: 1. translation (t5) 2. summarization (mbart)
@parameterized.expand([T5_TINY, MBART_TINY]) @parameterized.expand([T5_TINY, MBART_TINY])
@slow @slow
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_run_eval_search(self, model): def test_run_eval_search(self, model):
input_file_name = Path(self.get_auto_remove_tmp_dir()) / "utest_input.source" input_file_name = Path(self.get_auto_remove_tmp_dir()) / "utest_input.source"
output_file_name = input_file_name.parent / "utest_output.txt" output_file_name = input_file_name.parent / "utest_output.txt"
@ -374,7 +374,7 @@ class TestTheRest(TestCasePlus):
@parameterized.expand( @parameterized.expand(
[T5_TINY, BART_TINY, MBART_TINY, MARIAN_TINY, FSMT_TINY], [T5_TINY, BART_TINY, MBART_TINY, MARIAN_TINY, FSMT_TINY],
) )
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_finetune(self, model): def test_finetune(self, model):
args_d: dict = CHEAP_ARGS.copy() args_d: dict = CHEAP_ARGS.copy()
task = "translation" if model in [MBART_TINY, MARIAN_TINY, FSMT_TINY] else "summarization" task = "translation" if model in [MBART_TINY, MARIAN_TINY, FSMT_TINY] else "summarization"
@ -426,7 +426,7 @@ class TestTheRest(TestCasePlus):
assert isinstance(example_batch, dict) assert isinstance(example_batch, dict)
assert len(example_batch) >= 4 assert len(example_batch) >= 4
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_finetune_extra_model_args(self): def test_finetune_extra_model_args(self):
args_d: dict = CHEAP_ARGS.copy() args_d: dict = CHEAP_ARGS.copy()
@ -477,7 +477,7 @@ class TestTheRest(TestCasePlus):
model = main(args) model = main(args)
assert str(excinfo.value) == f"model config doesn't have a `{unsupported_param}` attribute" assert str(excinfo.value) == f"model config doesn't have a `{unsupported_param}` attribute"
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_finetune_lr_schedulers(self): def test_finetune_lr_schedulers(self):
args_d: dict = CHEAP_ARGS.copy() args_d: dict = CHEAP_ARGS.copy()

View File

@ -8,7 +8,7 @@ from transformers.testing_utils import (
execute_subprocess_async, execute_subprocess_async,
get_gpu_count, get_gpu_count,
require_torch_gpu, require_torch_gpu,
require_torch_multigpu, require_torch_multi_gpu,
slow, slow,
) )
@ -21,8 +21,8 @@ class TestSummarizationDistillerMultiGPU(TestCasePlus):
def setUpClass(cls): def setUpClass(cls):
return cls return cls
@require_torch_multigpu @require_torch_multi_gpu
def test_multigpu(self): def test_multi_gpu(self):
updates = dict( updates = dict(
no_teacher=True, no_teacher=True,

View File

@ -4,7 +4,7 @@ import unittest
from transformers.convert_marian_tatoeba_to_pytorch import DEFAULT_REPO, TatoebaConverter from transformers.convert_marian_tatoeba_to_pytorch import DEFAULT_REPO, TatoebaConverter
from transformers.file_utils import cached_property from transformers.file_utils import cached_property
from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, slow
@unittest.skipUnless(os.path.exists(DEFAULT_REPO), "Tatoeba directory does not exist.") @unittest.skipUnless(os.path.exists(DEFAULT_REPO), "Tatoeba directory does not exist.")
@ -15,12 +15,12 @@ class TatoebaConversionTester(unittest.TestCase):
return TatoebaConverter(save_dir=tmp_dir) return TatoebaConverter(save_dir=tmp_dir)
@slow @slow
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_resolver(self): def test_resolver(self):
self.resolver.convert_models(["heb-eng"]) self.resolver.convert_models(["heb-eng"])
@slow @slow
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_model_card(self): def test_model_card(self):
content, mmeta = self.resolver.write_model_card("opus-mt-he-en", dry_run=True) content, mmeta = self.resolver.write_model_card("opus-mt-he-en", dry_run=True)
assert mmeta["long_pair"] == "heb-eng" assert mmeta["long_pair"] == "heb-eng"

View File

@ -23,7 +23,7 @@ from unittest.mock import patch
import torch import torch
from transformers.file_utils import is_apex_available from transformers.file_utils import is_apex_available
from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me, torch_device from transformers.testing_utils import TestCasePlus, require_torch_non_multi_gpu_but_fix_me, torch_device
SRC_DIRS = [ SRC_DIRS = [
@ -67,7 +67,7 @@ def is_cuda_and_apex_available():
class ExamplesTests(TestCasePlus): class ExamplesTests(TestCasePlus):
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_run_glue(self): def test_run_glue(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
@ -100,7 +100,7 @@ class ExamplesTests(TestCasePlus):
for value in result.values(): for value in result.values():
self.assertGreaterEqual(value, 0.75) self.assertGreaterEqual(value, 0.75)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_run_pl_glue(self): def test_run_pl_glue(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
@ -138,7 +138,7 @@ class ExamplesTests(TestCasePlus):
# self.assertGreaterEqual(v, 0.75, f"({k})") # self.assertGreaterEqual(v, 0.75, f"({k})")
# #
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_run_clm(self): def test_run_clm(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
@ -170,7 +170,7 @@ class ExamplesTests(TestCasePlus):
result = run_clm.main() result = run_clm.main()
self.assertLess(result["perplexity"], 100) self.assertLess(result["perplexity"], 100)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_run_mlm(self): def test_run_mlm(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
@ -196,7 +196,7 @@ class ExamplesTests(TestCasePlus):
result = run_mlm.main() result = run_mlm.main()
self.assertLess(result["perplexity"], 42) self.assertLess(result["perplexity"], 42)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_run_ner(self): def test_run_ner(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
@ -227,7 +227,7 @@ class ExamplesTests(TestCasePlus):
self.assertGreaterEqual(result["eval_precision"], 0.75) self.assertGreaterEqual(result["eval_precision"], 0.75)
self.assertLess(result["eval_loss"], 0.5) self.assertLess(result["eval_loss"], 0.5)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_run_squad(self): def test_run_squad(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
@ -256,7 +256,7 @@ class ExamplesTests(TestCasePlus):
self.assertGreaterEqual(result["f1"], 25) self.assertGreaterEqual(result["f1"], 25)
self.assertGreaterEqual(result["exact"], 21) self.assertGreaterEqual(result["exact"], 21)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_generation(self): def test_generation(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)

View File

@ -20,7 +20,7 @@ import unittest
from time import time from time import time
from unittest.mock import patch from unittest.mock import patch
from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, require_torch_tpu from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, require_torch_tpu
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
@ -30,7 +30,7 @@ logger = logging.getLogger()
@require_torch_tpu @require_torch_tpu
class TorchXLAExamplesTests(unittest.TestCase): class TorchXLAExamplesTests(unittest.TestCase):
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_run_glue(self): def test_run_glue(self):
import xla_spawn import xla_spawn
@ -82,7 +82,7 @@ class TorchXLAExamplesTests(unittest.TestCase):
# Assert that the script takes less than 300 seconds to make sure it doesn't hang. # Assert that the script takes less than 300 seconds to make sure it doesn't hang.
self.assertLess(end - start, 500) self.assertLess(end - start, 500)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_trainer_tpu(self): def test_trainer_tpu(self):
import xla_spawn import xla_spawn

View File

@ -4,7 +4,7 @@ import unittest
from unittest.mock import patch from unittest.mock import patch
import run_ner_old as run_ner import run_ner_old as run_ner
from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, slow
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@ -14,7 +14,7 @@ logger = logging.getLogger()
class ExamplesTests(unittest.TestCase): class ExamplesTests(unittest.TestCase):
@slow @slow
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_run_ner(self): def test_run_ner(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
@ -35,7 +35,7 @@ class ExamplesTests(unittest.TestCase):
result = run_ner.main() result = run_ner.main()
self.assertLess(result["eval_loss"], 1.5) self.assertLess(result["eval_loss"], 1.5)
@require_torch_non_multigpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_run_ner_pl(self): def test_run_ner_pl(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)

View File

@ -193,13 +193,13 @@ def require_tokenizers(test_case):
return test_case return test_case
def require_torch_multigpu(test_case): def require_torch_multi_gpu(test_case):
""" """
Decorator marking a test that requires a multi-GPU setup (in PyTorch). Decorator marking a test that requires a multi-GPU setup (in PyTorch).
These tests are skipped on a machine without multiple GPUs. These tests are skipped on a machine without multiple GPUs.
To run *only* the multigpu tests, assuming all test names contain multigpu: $ pytest -sv ./tests -k "multigpu" To run *only* the multi_gpu tests, assuming all test names contain multi_gpu: $ pytest -sv ./tests -k "multi_gpu"
""" """
if not _torch_available: if not _torch_available:
return unittest.skip("test requires PyTorch")(test_case) return unittest.skip("test requires PyTorch")(test_case)
@ -212,7 +212,7 @@ def require_torch_multigpu(test_case):
return test_case return test_case
def require_torch_non_multigpu(test_case): def require_torch_non_multi_gpu(test_case):
""" """
Decorator marking a test that requires 0 or 1 GPU setup (in PyTorch). Decorator marking a test that requires 0 or 1 GPU setup (in PyTorch).
""" """
@ -227,10 +227,10 @@ def require_torch_non_multigpu(test_case):
return test_case return test_case
# this is a decorator identical to require_torch_non_multigpu, but is used as a quick band-aid to # this is a decorator identical to require_torch_non_multi_gpu, but is used as a quick band-aid to
# allow all of examples to be run multi-gpu CI and it reminds us that tests decorated with this one # allow all of examples to be run multi-gpu CI and it reminds us that tests decorated with this one
# need to be ported and aren't so by design. # need to be ported and aren't so by design.
require_torch_non_multigpu_but_fix_me = require_torch_non_multigpu require_torch_non_multi_gpu_but_fix_me = require_torch_non_multi_gpu
def require_torch_tpu(test_case): def require_torch_tpu(test_case):

View File

@ -23,7 +23,7 @@ from typing import List, Tuple
from transformers import is_torch_available from transformers import is_torch_available
from transformers.file_utils import WEIGHTS_NAME from transformers.file_utils import WEIGHTS_NAME
from transformers.testing_utils import require_torch, require_torch_multigpu, slow, torch_device from transformers.testing_utils import require_torch, require_torch_multi_gpu, slow, torch_device
if is_torch_available(): if is_torch_available():
@ -928,8 +928,8 @@ class ModelTesterMixin:
with torch.no_grad(): with torch.no_grad():
model(**inputs)[0] model(**inputs)[0]
@require_torch_multigpu @require_torch_multi_gpu
def test_multigpu_data_parallel_forward(self): def test_multi_gpu_data_parallel_forward(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
# some params shouldn't be scattered by nn.DataParallel # some params shouldn't be scattered by nn.DataParallel

View File

@ -29,7 +29,7 @@ from transformers.testing_utils import (
require_sentencepiece, require_sentencepiece,
require_tokenizers, require_tokenizers,
require_torch, require_torch,
require_torch_non_multigpu, require_torch_non_multi_gpu,
slow, slow,
torch_device, torch_device,
) )
@ -581,7 +581,7 @@ class RagDPRT5Test(RagTestMixin, unittest.TestCase):
@require_retrieval @require_retrieval
@require_sentencepiece @require_sentencepiece
@require_tokenizers @require_tokenizers
@require_torch_non_multigpu @require_torch_non_multi_gpu
class RagModelIntegrationTests(unittest.TestCase): class RagModelIntegrationTests(unittest.TestCase):
@cached_property @cached_property
def sequence_model(self): def sequence_model(self):

View File

@ -20,7 +20,7 @@ from transformers.testing_utils import (
require_sentencepiece, require_sentencepiece,
require_tokenizers, require_tokenizers,
require_torch, require_torch,
require_torch_multigpu, require_torch_multi_gpu,
slow, slow,
torch_device, torch_device,
) )
@ -562,8 +562,8 @@ class ReformerTesterMixin:
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_reformer_model_fp16_generate(*config_and_inputs) self.model_tester.create_and_check_reformer_model_fp16_generate(*config_and_inputs)
@require_torch_multigpu @require_torch_multi_gpu
def test_multigpu_data_parallel_forward(self): def test_multi_gpu_data_parallel_forward(self):
# Opt-out of this test. # Opt-out of this test.
pass pass

View File

@ -17,7 +17,7 @@ import random
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
from transformers.testing_utils import require_torch, require_torch_multigpu, slow, torch_device from transformers.testing_utils import require_torch, require_torch_multi_gpu, slow, torch_device
from .test_configuration_common import ConfigTester from .test_configuration_common import ConfigTester
from .test_generation_utils import GenerationTesterMixin from .test_generation_utils import GenerationTesterMixin
@ -205,8 +205,8 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC
output_result = self.model_tester.create_transfo_xl_lm_head(*config_and_inputs) output_result = self.model_tester.create_transfo_xl_lm_head(*config_and_inputs)
self.model_tester.check_transfo_xl_lm_head_output(output_result) self.model_tester.check_transfo_xl_lm_head_output(output_result)
@require_torch_multigpu @require_torch_multi_gpu
def test_multigpu_data_parallel_forward(self): def test_multi_gpu_data_parallel_forward(self):
# Opt-out of this test. # Opt-out of this test.
pass pass

View File

@ -2,7 +2,7 @@ import sys
from typing import Dict from typing import Dict
from transformers import EvalPrediction, HfArgumentParser, TrainingArguments, is_torch_available from transformers import EvalPrediction, HfArgumentParser, TrainingArguments, is_torch_available
from transformers.testing_utils import TestCasePlus, execute_subprocess_async, require_torch_multigpu from transformers.testing_utils import TestCasePlus, execute_subprocess_async, require_torch_multi_gpu
from transformers.utils import logging from transformers.utils import logging
@ -44,7 +44,7 @@ if is_torch_available():
class TestTrainerDistributed(TestCasePlus): class TestTrainerDistributed(TestCasePlus):
@require_torch_multigpu @require_torch_multi_gpu
def test_trainer(self): def test_trainer(self):
distributed_args = f""" distributed_args = f"""