From d1fd64e7aa40d6a3c69cb21f7fd411a2a3141e04 Mon Sep 17 00:00:00 2001
From: Stas Bekman <stas00@users.noreply.github.com>
Date: Thu, 25 Nov 2021 00:15:35 -0800
Subject: [PATCH] clear ~/.cache/torch_extensions between builds (#14520)

---
 .github/workflows/self-nightly-scheduled.yml |  7 ++++---
 .github/workflows/self-push.yml              | 13 +++++++------
 .github/workflows/self-scheduled.yml         |  1 +
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/self-nightly-scheduled.yml b/.github/workflows/self-nightly-scheduled.yml
index 6f76e9e8a39..93e9e317a0c 100644
--- a/.github/workflows/self-nightly-scheduled.yml
+++ b/.github/workflows/self-nightly-scheduled.yml
@@ -205,8 +205,9 @@ jobs:
                   apt -y update && apt install -y libaio-dev
                   pip install --upgrade pip
                   pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu111/torch_nightly.html -U
-                  pip install .[testing,deepspeed,fairscale]
-                  pip install git+https://github.com/microsoft/DeepSpeed
+                  rm -rf ~/.cache/torch_extensions/ # shared between conflicting builds
+                  pip install .[testing,fairscale]
+                  pip install git+https://github.com/microsoft/DeepSpeed # testing bleeding edge
 
             - name: Are GPUs recognized by our DL frameworks
               run: |
@@ -218,7 +219,7 @@ jobs:
             - name: Run all tests on GPU
               run: |
                   python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_torch_cuda_extensions_multi_gpu tests/deepspeed tests/extended
- 
+
             - name: Failure short reports
               if: ${{ always() }}
               run: cat reports/tests_torch_cuda_extensions_multi_gpu_failures_short.txt
diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml
index 57473f45b0e..5d4218787f0 100644
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@@ -50,7 +50,7 @@ jobs:
           python -c "import torch; print('Cuda version:', torch.version.cuda)"
           python -c "import torch; print('CuDNN version:', torch.backends.cudnn.version())"
           python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
-      
+
       - name: Fetch the tests to run
         run: |
           python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
@@ -105,7 +105,7 @@ jobs:
         run: |
           python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
           python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
-      
+
       - name: Fetch the tests to run
         run: |
           python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
@@ -203,7 +203,7 @@ jobs:
           apt install -y libsndfile1-dev
           pip install --upgrade pip
           pip install .[sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm]
-      
+
       - name: Launcher docker
         uses: actions/checkout@v2
         with:
@@ -277,7 +277,7 @@ jobs:
 #        run: |
 #          python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
 #          python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
-#      
+#
 #      - name: Fetch the tests to run
 #        run: |
 #          python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
@@ -389,11 +389,11 @@ jobs:
           python -c "import torch; print('Cuda version:', torch.version.cuda)"
           python -c "import torch; print('CuDNN version:', torch.backends.cudnn.version())"
           python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
-      
+
       - name: Fetch the tests to run
         run: |
           python utils/tests_fetcher.py --diff_with_last_commit --filters tests/deepspeed tests/extended | tee test_preparation.txt
-      
+
       - name: Report fetched tests
         uses: actions/upload-artifact@v2
         with:
@@ -437,6 +437,7 @@ jobs:
         run: |
           apt -y update && apt install -y libaio-dev
           pip install --upgrade pip
+          rm -rf ~/.cache/torch_extensions/ # shared between conflicting builds
           pip install .[testing,deepspeed,fairscale]
 
       - name: Are GPUs recognized by our DL frameworks
diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index 1d17d96ce35..f6b3a617589 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -429,6 +429,7 @@ jobs:
         run: |
           apt -y update && apt install -y libaio-dev
           pip install --upgrade pip
+          rm -rf ~/.cache/torch_extensions/ # shared between conflicting builds
           pip install .[testing,deepspeed,fairscale]
 
       - name: Are GPUs recognized by our DL frameworks