diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index 165af806730..c9b63233d0c 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -198,19 +198,12 @@ jobs: machine_type: [single-gpu] runs-on: [self-hosted, docker-gpu, '${{ matrix.machine_type }}'] container: - image: nvcr.io/nvidia/pytorch:21.03-py3 + image: huggingface/transformers-pytorch-deepspeed-latest-gpu options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: - - name: Checkout transformers - uses: actions/checkout@v2 - with: - fetch-depth: 2 - - - name: Install dependencies - run: | - apt -y update && apt install -y libaio-dev - pip install --upgrade pip - pip install .[deepspeed-testing] + - name: Update clone + working-directory: /workspace/transformers + run: git fetch && git checkout ${{ github.sha }} - name: NVIDIA-SMI run: | @@ -247,30 +240,24 @@ jobs: machine_type: [multi-gpu] runs-on: [self-hosted, docker-gpu, '${{ matrix.machine_type }}'] container: - image: nvcr.io/nvidia/pytorch:21.03-py3 + image: huggingface/transformers-pytorch-deepspeed-latest-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: - - name: Checkout transformers - uses: actions/checkout@v2 - with: - fetch-depth: 2 - - - name: Install dependencies - run: | - apt -y update && apt install -y libaio-dev - pip install --upgrade pip - rm -rf ~/.cache/torch_extensions/ # shared between conflicting builds - pip install .[testing,deepspeed,fairscale] + - name: Update clone + working-directory: /workspace/transformers + run: git fetch && git checkout ${{ github.sha }} - name: NVIDIA-SMI run: | nvidia-smi - name: Environment + working-directory: /workspace/transformers run: | python utils/print_env.py - name: Run all non-slow selected tests on GPU + working-directory: /workspace/transformers # TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests. run: | python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended @@ -278,14 +265,14 @@ jobs: - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt + run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt - name: Test suite reports artifacts if: ${{ always() }} uses: actions/upload-artifact@v2 with: name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports - path: reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu + path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu send_results: name: Send results to webhook diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 8f378a6618c..b1f49ee190e 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -306,14 +306,6 @@ jobs: working-directory: /workspace/transformers run: git fetch && git checkout ${{ github.sha }} - - name: Re-compile DeepSpeed - working-directory: /workspace - run: | - pip install deepspeed # installs the deps correctly - rm -rf DeepSpeed - git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build - DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install -e . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check - - name: NVIDIA-SMI run: | nvidia-smi diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile index f18d81cb54a..8d63921ec02 100644 --- a/docker/transformers-all-latest-gpu/Dockerfile +++ b/docker/transformers-all-latest-gpu/Dockerfile @@ -11,9 +11,13 @@ ARG REF=main RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] -RUN python3 -m pip install --no-cache-dir -U torch tensorflow +RUN python3 -m pip install --no-cache-dir -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113 +RUN python3 -m pip install --no-cache-dir -U tensorflow RUN python3 -m pip uninstall -y flax jax -RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+cu102.html + +RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+cu113.html +RUN python3 -m pip install --no-cache-dir intel_extension_for_pytorch==$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+cpu -f https://software.intel.com/ipex-whl-stable + RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract https://github.com/kpu/kenlm/archive/master.zip RUN python3 -m pip install -U "itsdangerous<2.1.0" diff --git a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile index 1dd080c319b..3819340ec5a 100644 --- a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile +++ b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile @@ -11,8 +11,8 @@ ARG REF=main RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF RUN python3 -m pip install --no-cache-dir -e ./transformers[deepspeed-testing] -RUN git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build && \ - DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install -e . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1 +# Install latest release PyTorch +RUN python3 -m pip install --no-cache-dir -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113 # When installing in editable mode, `transformers` is not recognized as a package. # this line must be added in order for python to be aware of transformers. diff --git a/docker/transformers-pytorch-gpu/Dockerfile b/docker/transformers-pytorch-gpu/Dockerfile index d1828190b14..87b133b34c5 100644 --- a/docker/transformers-pytorch-gpu/Dockerfile +++ b/docker/transformers-pytorch-gpu/Dockerfile @@ -13,11 +13,16 @@ RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing] # If set to nothing, will install the latest version ARG PYTORCH='' +ARG TORCH_VISION='' +ARG TORCH_AUDIO='' + +RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/cu113 +RUN [ ${#TORCH_VISION} -gt 0 ] && VERSION='torchvision=='TORCH_VISION'.*' || VERSION='torchvision'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/cu113 +RUN [ ${#TORCH_AUDIO} -gt 0 ] && VERSION='torchaudio=='TORCH_AUDIO'.*' || VERSION='torchaudio'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/cu113 -RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; python3 -m pip install --no-cache-dir -U $VERSION RUN python3 -m pip uninstall -y tensorflow flax -RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+cu102.html +RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+cu113.html RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract https://github.com/kpu/kenlm/archive/master.zip RUN python3 -m pip install -U "itsdangerous<2.1.0"