diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
index c21faf2d747..a51b1f9f154 100644
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@@ -63,14 +63,14 @@ jobs:
         uses: huggingface/hf-workflows/.github/actions/post-slack@main
         with:
           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build 
+          title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build
           status: ${{ job.status }}
           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
 
   latest-torch-deepspeed-docker:
     name: "Latest PyTorch + DeepSpeed"
     runs-on:
-      group: aws-general-8-plus
+      group: aws-g4dn-2xlarge-cache
     steps:
       -
         name: Set up Docker Buildx
@@ -99,7 +99,7 @@ jobs:
         uses: huggingface/hf-workflows/.github/actions/post-slack@main
         with:
           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER}}
-          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build 
+          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build
           status: ${{ job.status }}
           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
 
@@ -140,7 +140,7 @@ jobs:
         uses: huggingface/hf-workflows/.github/actions/post-slack@main
         with:
           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build 
+          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build
           status: ${{ job.status }}
           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
 
@@ -176,7 +176,7 @@ jobs:
         uses: huggingface/hf-workflows/.github/actions/post-slack@main
         with:
           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-doc-builder docker build 
+          title: 🤗 Results of the huggingface/transformers-doc-builder docker build
           status: ${{ job.status }}
           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
 
@@ -214,7 +214,7 @@ jobs:
         uses: huggingface/hf-workflows/.github/actions/post-slack@main
         with:
           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build 
+          title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build
           status: ${{ job.status }}
           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
 
@@ -223,19 +223,19 @@ jobs:
     runs-on:
       group: aws-general-8-plus
     steps:
-      - 
+      -
         name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
-      - 
+      -
         name: Check out code
         uses: actions/checkout@v4
-      - 
+      -
         name: Login to DockerHub
         uses: docker/login-action@v3
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      - 
+      -
         name: Build and push
         uses: docker/build-push-action@v5
         with:
@@ -263,7 +263,7 @@ jobs:
         uses: huggingface/hf-workflows/.github/actions/post-slack@main
         with:
           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build 
+          title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build
           status: ${{ job.status }}
           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
 
@@ -301,7 +301,7 @@ jobs:
         uses: huggingface/hf-workflows/.github/actions/post-slack@main
         with:
           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build 
+          title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build
           status: ${{ job.status }}
           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
 
@@ -310,19 +310,19 @@ jobs:
     runs-on:
       group: aws-general-8-plus
     steps:
-      - 
+      -
         name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
-      - 
+      -
         name: Check out code
         uses: actions/checkout@v4
-      - 
+      -
         name: Login to DockerHub
         uses: docker/login-action@v3
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      - 
+      -
         name: Build and push
         uses: docker/build-push-action@v5
         with:
@@ -350,7 +350,7 @@ jobs:
         uses: huggingface/hf-workflows/.github/actions/post-slack@main
         with:
           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build 
+          title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build
           status: ${{ job.status }}
           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
 
@@ -388,6 +388,6 @@ jobs:
         uses: huggingface/hf-workflows/.github/actions/post-slack@main
         with:
           slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-quantization-latest-gpu build 
+          title: 🤗 Results of the transformers-quantization-latest-gpu build
           status: ${{ job.status }}
           slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
diff --git a/.github/workflows/build-nightly-ci-docker-images.yml b/.github/workflows/build-nightly-ci-docker-images.yml
index 4b00a6d3fae..dfab083503c 100644
--- a/.github/workflows/build-nightly-ci-docker-images.yml
+++ b/.github/workflows/build-nightly-ci-docker-images.yml
@@ -42,7 +42,7 @@ jobs:
   nightly-torch-deepspeed-docker:
     name: "Nightly PyTorch + DeepSpeed"
     runs-on:
-      group: aws-general-8-plus
+      group: aws-g4dn-2xlarge-cache
     steps:
       -
         name: Set up Docker Buildx
diff --git a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
index 36f8506a476..45aa89fefb2 100644
--- a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
+++ b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
@@ -1,12 +1,12 @@
-# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11
-FROM nvcr.io/nvidia/pytorch:23.11-py3
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
+FROM nvcr.io/nvidia/pytorch:24.08-py3
 LABEL maintainer="Hugging Face"
 
 ARG DEBIAN_FRONTEND=noninteractive
 
-ARG PYTORCH='2.2.0'
+ARG PYTORCH='2.6.0'
 # Example: `cu102`, `cu113`, etc.
-ARG CUDA='cu121'
+ARG CUDA='cu126'
 
 RUN apt -y update
 RUN apt install -y libaio-dev
@@ -15,7 +15,8 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip
 ARG REF=main
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
 
-RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
+# `datasets` requires pandas, pandas has some modules compiled with numpy=1.x causing errors
+RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'pandas<2' 'numpy<2'
 
 # Install latest release PyTorch
 # (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
diff --git a/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile
index 80de73e37c4..9daa27c06e4 100644
--- a/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile
+++ b/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile
@@ -1,11 +1,11 @@
 # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11
-FROM nvcr.io/nvidia/pytorch:23.11-py3
+FROM nvcr.io/nvidia/pytorch:24.08-py3
 LABEL maintainer="Hugging Face"
 
 ARG DEBIAN_FRONTEND=noninteractive
 
 # Example: `cu102`, `cu113`, etc.
-ARG CUDA='cu121'
+ARG CUDA='cu126'
 
 RUN apt -y update
 RUN apt install -y libaio-dev
@@ -21,7 +21,8 @@ RUN python3 -m pip uninstall -y torch torchvision torchaudio
 # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
 RUN python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
 
-RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
+# `datasets` requires pandas, pandas has some modules compiled with numpy=1.x causing errors
+RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'pandas<2' 'numpy<2'
 
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate