From d1fd64e7aa40d6a3c69cb21f7fd411a2a3141e04 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 25 Nov 2021 00:15:35 -0800 Subject: [PATCH] clear ~/.cache/torch_extensions between builds (#14520) --- .github/workflows/self-nightly-scheduled.yml | 7 ++++--- .github/workflows/self-push.yml | 13 +++++++------ .github/workflows/self-scheduled.yml | 1 + 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/workflows/self-nightly-scheduled.yml b/.github/workflows/self-nightly-scheduled.yml index 6f76e9e8a39..93e9e317a0c 100644 --- a/.github/workflows/self-nightly-scheduled.yml +++ b/.github/workflows/self-nightly-scheduled.yml @@ -205,8 +205,9 @@ jobs: apt -y update && apt install -y libaio-dev pip install --upgrade pip pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu111/torch_nightly.html -U - pip install .[testing,deepspeed,fairscale] - pip install git+https://github.com/microsoft/DeepSpeed + rm -rf ~/.cache/torch_extensions/ # shared between conflicting builds + pip install .[testing,fairscale] + pip install git+https://github.com/microsoft/DeepSpeed # testing bleeding edge - name: Are GPUs recognized by our DL frameworks run: | @@ -218,7 +219,7 @@ jobs: - name: Run all tests on GPU run: | python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_torch_cuda_extensions_multi_gpu tests/deepspeed tests/extended - + - name: Failure short reports if: ${{ always() }} run: cat reports/tests_torch_cuda_extensions_multi_gpu_failures_short.txt diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index 57473f45b0e..5d4218787f0 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -50,7 +50,7 @@ jobs: python -c "import torch; print('Cuda version:', torch.version.cuda)" python -c "import torch; print('CuDNN version:', torch.backends.cudnn.version())" python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())" - + - name: Fetch the tests to run run: | python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt @@ -105,7 +105,7 @@ jobs: run: | python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)" python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))" - + - name: Fetch the tests to run run: | python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt @@ -203,7 +203,7 @@ jobs: apt install -y libsndfile1-dev pip install --upgrade pip pip install .[sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm] - + - name: Launcher docker uses: actions/checkout@v2 with: @@ -277,7 +277,7 @@ jobs: # run: | # python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)" # python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))" -# +# # - name: Fetch the tests to run # run: | # python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt @@ -389,11 +389,11 @@ jobs: python -c "import torch; print('Cuda version:', torch.version.cuda)" python -c "import torch; print('CuDNN version:', torch.backends.cudnn.version())" python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())" - + - name: Fetch the tests to run run: | python utils/tests_fetcher.py --diff_with_last_commit --filters tests/deepspeed tests/extended | tee test_preparation.txt - + - name: Report fetched tests uses: actions/upload-artifact@v2 with: @@ -437,6 +437,7 @@ jobs: run: | apt -y update && apt install -y libaio-dev pip install --upgrade pip + rm -rf ~/.cache/torch_extensions/ # shared between conflicting builds pip install .[testing,deepspeed,fairscale] - name: Are GPUs recognized by our DL frameworks diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 1d17d96ce35..f6b3a617589 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -429,6 +429,7 @@ jobs: run: | apt -y update && apt install -y libaio-dev pip install --upgrade pip + rm -rf ~/.cache/torch_extensions/ # shared between conflicting builds pip install .[testing,deepspeed,fairscale] - name: Are GPUs recognized by our DL frameworks