diff --git a/.github/workflows/model_jobs.yml b/.github/workflows/model_jobs.yml
index 0997a1112ad..95584176d6c 100644
--- a/.github/workflows/model_jobs.yml
+++ b/.github/workflows/model_jobs.yml
@@ -18,6 +18,10 @@ on:
       docker:
         required: true
         type: string
+      report_name_prefix:
+        required: false
+        default: run_models_gpu
+        type: string
 
 env:
   HF_HOME: /mnt/cache
@@ -116,23 +120,23 @@ jobs:
 
       - name: Run all tests on GPU
         working-directory: /transformers
-        run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
+        run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
 
       - name: Failure short reports
         if: ${{ failure() }}
         continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
+        run: cat /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/failures_short.txt
 
       - name: Run test
         shell: bash
         run: |
-          mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
-          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
-          echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
+          mkdir -p /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
+          echo "hello" > /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/hello.txt
+          echo "${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports"
 
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
+      - name: "Test suite reports artifacts: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports"
         if: ${{ always() }}
         uses: actions/upload-artifact@v4
         with:
-          name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+          name: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
+          path: /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml
index 75ea3bb24bc..8589f4a810b 100644
--- a/.github/workflows/self-scheduled-caller.yml
+++ b/.github/workflows/self-scheduled-caller.yml
@@ -54,12 +54,23 @@ jobs:
       ci_event: Daily CI
     secrets: inherit
 
+  trainer-fsdp-ci:
+    name: Trainer/FSDP CI
+    uses: ./.github/workflows/self-scheduled.yml
+    with:
+      job: run_trainer_and_fsdp_gpu
+      slack_report_channel: "#transformers-ci-daily-training"
+      runner: daily-ci
+      docker: huggingface/transformers-all-latest-gpu
+      ci_event: Daily CI
+    secrets: inherit
+
   deepspeed-ci:
     name: DeepSpeed CI
     uses: ./.github/workflows/self-scheduled.yml
     with:
       job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-daily-deepspeed"
+      slack_report_channel: "#transformers-ci-daily-training"
       runner: daily-ci
       docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
       ci_event: Daily CI
diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index dead87b5b6e..7fce6d60800 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -45,7 +45,7 @@ env:
 
 jobs:
   setup:
-    if: contains(fromJSON('["run_models_gpu", "run_quantization_torch_gpu"]'), inputs.job)
+    if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu", "run_quantization_torch_gpu"]'), inputs.job)
     name: Setup
     strategy:
       matrix:
@@ -77,12 +77,17 @@ jobs:
         run: pip freeze
 
       - id: set-matrix
-        if: ${{ inputs.job == 'run_models_gpu' }}
+        if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu"]'), inputs.job)
         name: Identify models to test
         working-directory: /transformers/tests
         run: |
-          echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
-          echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
+          if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
+            echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
+            echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
+          elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
+            echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
+            echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
+          fi
 
       - id: set-matrix-quantization
         if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
@@ -113,6 +118,25 @@ jobs:
       docker: ${{ inputs.docker }}
     secrets: inherit
 
+  run_trainer_and_fsdp_gpu:
+    if: ${{ inputs.job == 'run_trainer_and_fsdp_gpu' }}
+    name: " "
+    needs: setup
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+        slice_id: [0, 1]
+    uses: ./.github/workflows/model_jobs.yml
+    with:
+      folder_slices: ${{ needs.setup.outputs.folder_slices }}
+      machine_type: ${{ matrix.machine_type }}
+      slice_id: ${{ matrix.slice_id }}
+      runner: ${{ inputs.runner }}
+      docker: ${{ inputs.docker }}
+      report_name_prefix: run_trainer_and_fsdp_gpu
+    secrets: inherit
+
   run_pipelines_torch_gpu:
     if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
     name: PyTorch pipelines
@@ -336,10 +360,6 @@ jobs:
         working-directory: ${{ inputs.working-directory-prefix }}/transformers
         run: git fetch && git checkout ${{ github.sha }}
 
-      # TODO: update the docker image instead
-      - name: Reinstall some packages with specific versions
-        run: python3 -m pip install numpy==1.24.3 numba==0.61.0 scipy==1.12.0 scikit-learn==1.6.1
-
       - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
         working-directory: ${{ inputs.working-directory-prefix }}/transformers
         run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
@@ -545,6 +565,7 @@ jobs:
     needs: [
       setup,
       run_models_gpu,
+      run_trainer_and_fsdp_gpu,
       run_pipelines_torch_gpu,
       run_pipelines_tf_gpu,
       run_examples_gpu,
diff --git a/utils/notification_service.py b/utils/notification_service.py
index 66db34e00c2..dd01b082f4a 100644
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -942,7 +942,6 @@ if __name__ == "__main__":
     # To find the PR number in a commit title, for example, `Add AwesomeFormer model (#99999)`
     pr_number_re = re.compile(r"\(#(\d+)\)$")
 
-    title = f"🤗 Results of {ci_event} - {os.getenv('CI_TEST_JOB')}."
     # Add Commit/PR title with a link for push CI
     # (check the title in 2 env. variables - depending on the CI is triggered via `push` or `workflow_run` event)
     ci_title_push = os.environ.get("CI_TITLE_PUSH")
@@ -994,6 +993,8 @@ if __name__ == "__main__":
     else:
         ci_title = ""
 
+    # `title` will be updated at the end before calling `Message()`.
+    title = f"🤗 Results of {ci_event}"
     if runner_not_available or runner_failed or setup_failed:
         Message.error_out(title, ci_title, runner_not_available, runner_failed, setup_failed)
         exit(0)
@@ -1041,6 +1042,11 @@ if __name__ == "__main__":
         "Unclassified",
     ]
 
+    job_name = os.getenv("CI_TEST_JOB")
+    report_name_prefix = "run_models_gpu"
+    if job_name == "run_trainer_and_fsdp_gpu":
+        report_name_prefix = job_name
+
     # This dict will contain all the information relative to each model:
     # - Failures: the total, as well as the number of failures per-category defined above
     # - Success: total
@@ -1055,13 +1061,13 @@ if __name__ == "__main__":
             "job_link": {},
         }
         for model in models
-        if f"run_models_gpu_{model}_test_reports" in available_artifacts
+        if f"{report_name_prefix}_{model}_test_reports" in available_artifacts
     }
 
     unclassified_model_failures = []
 
     for model in model_results.keys():
-        for artifact_path in available_artifacts[f"run_models_gpu_{model}_test_reports"].paths:
+        for artifact_path in available_artifacts[f"{report_name_prefix}_{model}_test_reports"].paths:
             artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"])
             if "stats" in artifact:
                 # Link to the GitHub Action job
@@ -1123,7 +1129,7 @@ if __name__ == "__main__":
         "PyTorch pipelines": "run_pipelines_torch_gpu_test_reports",
         "TensorFlow pipelines": "run_pipelines_tf_gpu_test_reports",
         "Examples directory": "run_examples_gpu_test_reports",
-        "Torch CUDA extension tests": "run_torch_cuda_extensions_gpu_test_reports",
+        "DeepSpeed": "run_torch_cuda_extensions_gpu_test_reports",
     }
 
     if ci_event in ["push", "Nightly CI"] or ci_event.startswith("Past CI"):
@@ -1132,7 +1138,7 @@ if __name__ == "__main__":
         del additional_files["TensorFlow pipelines"]
     elif ci_event.startswith("Scheduled CI (AMD)"):
         del additional_files["TensorFlow pipelines"]
-        del additional_files["Torch CUDA extension tests"]
+        del additional_files["DeepSpeed"]
     elif ci_event.startswith("Push CI (AMD)"):
         additional_files = {}
 
@@ -1143,12 +1149,11 @@ if __name__ == "__main__":
         "run_pipelines_torch_gpu": "PyTorch pipelines",
         "run_pipelines_tf_gpu": "TensorFlow pipelines",
         "run_examples_gpu": "Examples directory",
-        "run_torch_cuda_extensions_gpu": "Torch CUDA extension tests",
+        "run_torch_cuda_extensions_gpu": "DeepSpeed",
     }
 
     # Remove some entries in `additional_files` if they are not concerned.
     test_name = None
-    job_name = os.getenv("CI_TEST_JOB")
     if job_name in job_to_test_map:
         test_name = job_to_test_map[job_name]
     additional_files = {k: v for k, v in additional_files.items() if k == test_name}
@@ -1243,7 +1248,7 @@ if __name__ == "__main__":
         "PyTorch pipelines": "torch_pipeline",
         "TensorFlow pipelines": "tf_pipeline",
         "Examples directory": "example",
-        "Torch CUDA extension tests": "deepspeed",
+        "DeepSpeed": "deepspeed",
     }
     for job, job_result in additional_results.items():
         with open(f"ci_results_{job_name}/{test_to_result_name[job]}_results.json", "w", encoding="UTF-8") as fp:
@@ -1270,6 +1275,19 @@ if __name__ == "__main__":
                 artifact_names=artifact_names, output_dir=output_dir, token=os.environ["ACCESS_REPO_INFO_TOKEN"]
             )
 
+    job_to_test_map.update(
+        {
+            "run_models_gpu": "Models",
+            "run_trainer_and_fsdp_gpu": "Trainer & FSDP",
+        }
+    )
+
+    ci_name_in_report = ""
+    if job_name in job_to_test_map:
+        ci_name_in_report = job_to_test_map[job_name]
+
+    title = f"🤗 Results of {ci_event}: {ci_name_in_report}"
+
     message = Message(
         title,
         ci_title,