diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index c21faf2d747..a51b1f9f154 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -63,14 +63,14 @@ jobs: uses: huggingface/hf-workflows/.github/actions/post-slack@main with: slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} - title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build + title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build status: ${{ job.status }} slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} latest-torch-deepspeed-docker: name: "Latest PyTorch + DeepSpeed" runs-on: - group: aws-general-8-plus + group: aws-g4dn-2xlarge-cache steps: - name: Set up Docker Buildx @@ -99,7 +99,7 @@ jobs: uses: huggingface/hf-workflows/.github/actions/post-slack@main with: slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER}} - title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build + title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build status: ${{ job.status }} slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} @@ -140,7 +140,7 @@ jobs: uses: huggingface/hf-workflows/.github/actions/post-slack@main with: slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} - title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build + title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build status: ${{ job.status }} slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} @@ -176,7 +176,7 @@ jobs: uses: huggingface/hf-workflows/.github/actions/post-slack@main with: slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} - title: 🤗 Results of the huggingface/transformers-doc-builder docker build + title: 🤗 Results of the huggingface/transformers-doc-builder docker build status: ${{ job.status }} slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} @@ -214,7 +214,7 @@ jobs: uses: huggingface/hf-workflows/.github/actions/post-slack@main with: slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} - title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build + title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build status: ${{ job.status }} slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} @@ -223,19 +223,19 @@ jobs: runs-on: group: aws-general-8-plus steps: - - + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - + - name: Check out code uses: actions/checkout@v4 - - + - name: Login to DockerHub uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_PASSWORD }} - - + - name: Build and push uses: docker/build-push-action@v5 with: @@ -263,7 +263,7 @@ jobs: uses: huggingface/hf-workflows/.github/actions/post-slack@main with: slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} - title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build + title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build status: ${{ job.status }} slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} @@ -301,7 +301,7 @@ jobs: uses: huggingface/hf-workflows/.github/actions/post-slack@main with: slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} - title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build + title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build status: ${{ job.status }} slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} @@ -310,19 +310,19 @@ jobs: runs-on: group: aws-general-8-plus steps: - - + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - + - name: Check out code uses: actions/checkout@v4 - - + - name: Login to DockerHub uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_PASSWORD }} - - + - name: Build and push uses: docker/build-push-action@v5 with: @@ -350,7 +350,7 @@ jobs: uses: huggingface/hf-workflows/.github/actions/post-slack@main with: slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} - title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build + title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build status: ${{ job.status }} slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} @@ -388,6 +388,6 @@ jobs: uses: huggingface/hf-workflows/.github/actions/post-slack@main with: slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }} - title: 🤗 Results of the transformers-quantization-latest-gpu build + title: 🤗 Results of the transformers-quantization-latest-gpu build status: ${{ job.status }} slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} diff --git a/.github/workflows/build-nightly-ci-docker-images.yml b/.github/workflows/build-nightly-ci-docker-images.yml index 4b00a6d3fae..dfab083503c 100644 --- a/.github/workflows/build-nightly-ci-docker-images.yml +++ b/.github/workflows/build-nightly-ci-docker-images.yml @@ -42,7 +42,7 @@ jobs: nightly-torch-deepspeed-docker: name: "Nightly PyTorch + DeepSpeed" runs-on: - group: aws-general-8-plus + group: aws-g4dn-2xlarge-cache steps: - name: Set up Docker Buildx diff --git a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile index 36f8506a476..45aa89fefb2 100644 --- a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile +++ b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile @@ -1,12 +1,12 @@ -# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11 -FROM nvcr.io/nvidia/pytorch:23.11-py3 +# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html +FROM nvcr.io/nvidia/pytorch:24.08-py3 LABEL maintainer="Hugging Face" ARG DEBIAN_FRONTEND=noninteractive -ARG PYTORCH='2.2.0' +ARG PYTORCH='2.6.0' # Example: `cu102`, `cu113`, etc. -ARG CUDA='cu121' +ARG CUDA='cu126' RUN apt -y update RUN apt install -y libaio-dev @@ -15,7 +15,8 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip ARG REF=main RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF -RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing] +# `datasets` requires pandas, pandas has some modules compiled with numpy=1.x causing errors +RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'pandas<2' 'numpy<2' # Install latest release PyTorch # (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.) diff --git a/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile index 80de73e37c4..9daa27c06e4 100644 --- a/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile +++ b/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile @@ -1,11 +1,11 @@ # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11 -FROM nvcr.io/nvidia/pytorch:23.11-py3 +FROM nvcr.io/nvidia/pytorch:24.08-py3 LABEL maintainer="Hugging Face" ARG DEBIAN_FRONTEND=noninteractive # Example: `cu102`, `cu113`, etc. -ARG CUDA='cu121' +ARG CUDA='cu126' RUN apt -y update RUN apt install -y libaio-dev @@ -21,7 +21,8 @@ RUN python3 -m pip uninstall -y torch torchvision torchaudio # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops) RUN python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA -RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing] +# `datasets` requires pandas, pandas has some modules compiled with numpy=1.x causing errors +RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'pandas<2' 'numpy<2' RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate