diff --git a/.github/workflows/self-nightly-scheduled.yml b/.github/workflows/self-nightly-scheduled.yml index 78b14463b31..1cdbd6982b7 100644 --- a/.github/workflows/self-nightly-scheduled.yml +++ b/.github/workflows/self-nightly-scheduled.yml @@ -179,6 +179,9 @@ jobs: working-directory: /workspace/transformers run: git fetch && git checkout ${{ github.sha }} + - name: Remove cached torch extensions + run: rm -rf /github/home/.cache/torch_extensions/ + # To avoid unknown test failures - name: Pre build DeepSpeed *again* working-directory: /workspace @@ -186,7 +189,7 @@ jobs: python3 -m pip uninstall -y deepspeed rm -rf DeepSpeed git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build - DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check + DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check - name: NVIDIA-SMI run: | diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index dd1222609d8..f2f88ad1b9a 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -343,12 +343,15 @@ jobs: git checkout ${{ env.CI_SHA }} echo "log = $(git log -n 1)" + - name: Remove cached torch extensions + run: rm -rf /github/home/.cache/torch_extensions/ + # To avoid unknown test failures - name: Pre build DeepSpeed *again* working-directory: /workspace run: | python3 -m pip uninstall -y deepspeed - DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check + DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check - name: NVIDIA-SMI run: | @@ -422,12 +425,15 @@ jobs: git checkout ${{ env.CI_SHA }} echo "log = $(git log -n 1)" + - name: Remove cached torch extensions + run: rm -rf /github/home/.cache/torch_extensions/ + # To avoid unknown test failures - name: Pre build DeepSpeed *again* working-directory: /workspace run: | python3 -m pip uninstall -y deepspeed - DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check + DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check - name: NVIDIA-SMI run: | diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 4d8feba32a5..34380faa4ec 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -306,12 +306,15 @@ jobs: working-directory: /workspace/transformers run: git fetch && git checkout ${{ github.sha }} + - name: Remove cached torch extensions + run: rm -rf /github/home/.cache/torch_extensions/ + # To avoid unknown test failures - name: Pre build DeepSpeed *again* working-directory: /workspace run: | python3 -m pip uninstall -y deepspeed - DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check + DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check - name: NVIDIA-SMI run: | diff --git a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile index 2b3292f350d..f5e29175b41 100644 --- a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile +++ b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile @@ -26,7 +26,7 @@ RUN python3 -m pip uninstall -y deepspeed # This has to be run (again) inside the GPU VMs running the tests. # The installation works here, but some tests fail, if we don't pre-build deepspeed again in the VMs running the tests. # TODO: Find out why test fail. -RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1 +RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1 # When installing in editable mode, `transformers` is not recognized as a package. # this line must be added in order for python to be aware of transformers. diff --git a/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile index 3f880dd95dc..1854d9f4b38 100644 --- a/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile +++ b/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile @@ -25,7 +25,7 @@ RUN python3 -m pip uninstall -y deepspeed # This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.) # Issue: https://github.com/microsoft/DeepSpeed/issues/2010 # RUN git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build && \ -# DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1 +# DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1 # When installing in editable mode, `transformers` is not recognized as a package. # this line must be added in order for python to be aware of transformers.