diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index 5fc037fec20..36c113190ca 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -593,8 +593,7 @@ jobs:
     secrets: inherit
 
   check_new_failures:
-    # TODO: work on `run_quantization_torch_gpu`
-    if: ${{ always() && inputs.ci_event == 'Daily CI' && inputs.job != 'run_quantization_torch_gpu' && needs.send_results.result == 'success' }}
+    if: ${{ always() && inputs.ci_event == 'Daily CI' && needs.send_results.result == 'success' }}
     name: Check new failures
     needs: send_results
     uses: ./.github/workflows/check_failed_tests.yml
diff --git a/.github/workflows/slack-report.yml b/.github/workflows/slack-report.yml
index c6aa336e8f4..5ef74946964 100644
--- a/.github/workflows/slack-report.yml
+++ b/.github/workflows/slack-report.yml
@@ -58,7 +58,7 @@ jobs:
           fi
 
       - name: Send message to Slack
-        if: ${{ inputs.job != 'run_quantization_torch_gpu' }}
+        shell: bash
         env:
           CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
           CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
@@ -79,7 +79,11 @@ jobs:
           pip install huggingface_hub
           pip install slack_sdk
           pip show slack_sdk
-          python utils/notification_service.py "${{ inputs.folder_slices }}"
+          if [ "${{ inputs.quantization_matrix }}" != "" ]; then
+            python utils/notification_service.py "${{ inputs.quantization_matrix }}"
+          else
+            python utils/notification_service.py "${{ inputs.folder_slices }}"
+          fi          
 
       # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
       - name: Failure table artifacts
@@ -87,32 +91,3 @@ jobs:
         with:
           name: ci_results_${{ inputs.job }}
           path: ci_results_${{ inputs.job }}
-
-      - uses: actions/checkout@v4
-      - uses: actions/download-artifact@v4
-      - name: Send message to Slack for quantization workflow
-        if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
-        env:
-          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
-          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-          SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
-          CI_EVENT: ${{ inputs.ci_event }}
-          CI_SHA: ${{ github.sha }}
-          CI_TEST_JOB: ${{ inputs.job }}
-          SETUP_STATUS: ${{ inputs.setup_status }}
-          REPORT_REPO_ID: ${{ inputs.report_repo_id }}
-        # We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
-        # `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
-        run: |
-          pip install huggingface_hub
-          pip install slack_sdk
-          pip show slack_sdk
-          python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
-
-      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
-      - name: Failure table artifacts
-        if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ci_results_${{ inputs.job }}
-          path: ci_results_${{ inputs.job }}
diff --git a/utils/notification_service.py b/utils/notification_service.py
index 82e6cd1add5..5c54809b262 100644
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -31,8 +31,7 @@ from slack_sdk import WebClient
 
 
 # A map associating the job names (specified by `inputs.job` in a workflow file) with the keys of
-# `additional_files`. This is used to remove some entries in `additional_files` that are not concerned by a
-# specific job. See below.
+# `additional_files`.
 job_to_test_map = {
     "run_models_gpu": "Models",
     "run_trainer_and_fsdp_gpu": "Trainer & FSDP",
@@ -40,6 +39,18 @@ job_to_test_map = {
     "run_pipelines_tf_gpu": "TensorFlow pipelines",
     "run_examples_gpu": "Examples directory",
     "run_torch_cuda_extensions_gpu": "DeepSpeed",
+    "run_quantization_torch_gpu": "Quantization",
+}
+
+# The values are used as the file names where to save the corresponding CI job results.
+test_to_result_name = {
+    "Models": "model",
+    "Trainer & FSDP": "trainer_and_fsdp",
+    "PyTorch pipelines": "torch_pipeline",
+    "TensorFlow pipelines": "tf_pipeline",
+    "Examples directory": "example",
+    "DeepSpeed": "deepspeed",
+    "Quantization": "quantization",
 }
 
 NON_MODEL_TEST_MODULES = [
@@ -53,6 +64,8 @@ NON_MODEL_TEST_MODULES = [
     "sagemaker",
     "trainer",
     "utils",
+    "fsdp",
+    "quantization",
 ]
 
 
@@ -221,7 +234,6 @@ class Message:
                 "type": "plain_text",
                 "text": (
                     f"There were {self.n_failures} failures, out of {self.n_tests} tests.\n"
-                    f"Number of model failures: {self.n_model_failures}.\n"
                     f"The suite ran in {self.time}."
                 ),
                 "emoji": True,
@@ -276,6 +288,10 @@ class Message:
 
     @property
     def category_failures(self) -> Dict:
+        if job_name != "run_models_gpu":
+            category_failures_report = ""
+            return {"type": "section", "text": {"type": "mrkdwn", "text": category_failures_report}}
+
         model_failures = [v["failed"] for v in self.model_results.values()]
 
         category_failures = {}
@@ -301,7 +317,7 @@ class Message:
 
         header = "Single |  Multi | Category\n"
         category_failures_report = prepare_reports(
-            title="The following modeling categories had failures", header=header, reports=individual_reports
+            title="The following categories had failures", header=header, reports=individual_reports
         )
 
         return {"type": "section", "text": {"type": "mrkdwn", "text": category_failures_report}}
@@ -355,25 +371,40 @@ class Message:
         }
 
         for k, v in self.model_results.items():
+            # The keys in `model_results` may contain things like `models_vit` or `quantization_autoawq`
+            # Remove the prefix to make the report cleaner.
+            k = k.replace("models_", "").replace("quantization_", "")
             if k in NON_MODEL_TEST_MODULES:
-                pass
+                continue
 
             if sum(per_model_sum(v).values()):
                 dict_failed = dict(v["failed"])
-                pytorch_specific_failures = dict_failed.pop("PyTorch")
-                tensorflow_specific_failures = dict_failed.pop("TensorFlow")
-                other_failures = dicts_to_sum(dict_failed.values())
 
-                failures[k] = {
-                    "PyTorch": pytorch_specific_failures,
-                    "TensorFlow": tensorflow_specific_failures,
-                    "other": other_failures,
-                }
+                # Model job has a special form for reporting
+                if job_name == "run_models_gpu":
+                    pytorch_specific_failures = dict_failed.pop("PyTorch")
+                    tensorflow_specific_failures = dict_failed.pop("TensorFlow")
+                    other_failures = dicts_to_sum(dict_failed.values())
+
+                    failures[k] = {
+                        "PyTorch": pytorch_specific_failures,
+                        "TensorFlow": tensorflow_specific_failures,
+                        "other": other_failures,
+                    }
+
+                else:
+                    test_name = job_to_test_map[job_name]
+                    specific_failures = dict_failed.pop(test_name)
+                    failures[k] = {
+                        test_name: specific_failures,
+                    }
 
         model_reports = []
         other_module_reports = []
 
         for key, value in non_model_failures.items():
+            key = key.replace("models_", "").replace("quantization_", "")
+
             if key in NON_MODEL_TEST_MODULES:
                 device_report = self.get_device_report(value)
 
@@ -386,44 +417,60 @@ class Message:
                     other_module_reports.append(report)
 
         for key, value in failures.items():
-            device_report_values = [
-                value["PyTorch"]["single"],
-                value["PyTorch"]["multi"],
-                value["TensorFlow"]["single"],
-                value["TensorFlow"]["multi"],
-                sum(value["other"].values()),
-            ]
+            # Model job has a special form for reporting
+            if job_name == "run_models_gpu":
+                device_report_values = [
+                    value["PyTorch"]["single"],
+                    value["PyTorch"]["multi"],
+                    value["TensorFlow"]["single"],
+                    value["TensorFlow"]["multi"],
+                    sum(value["other"].values()),
+                ]
+
+            else:
+                test_name = job_to_test_map[job_name]
+                device_report_values = [
+                    value[test_name]["single"],
+                    value[test_name]["multi"],
+                ]
 
             if sum(device_report_values):
-                device_report = " | ".join([str(x).rjust(9) for x in device_report_values]) + " | "
+                # This is related to `model_header` below
+                rjust_width = 9 if job_name == "run_models_gpu" else 6
+                device_report = " | ".join([str(x).rjust(rjust_width) for x in device_report_values]) + " | "
                 report = f"{device_report}{key}"
 
                 model_reports.append(report)
 
         # (Possibly truncated) reports for the current workflow run - to be sent to Slack channels
-        model_header = "Single PT |  Multi PT | Single TF |  Multi TF |     Other | Category\n"
+        if job_name == "run_models_gpu":
+            model_header = "Single PT |  Multi PT | Single TF |  Multi TF |     Other | Category\n"
+        else:
+            model_header = "Single |  Multi | Category\n"
+
+        # Used when calling `prepare_reports` below to prepare the `title` argument
+        label = test_to_result_name[job_to_test_map[job_name]]
+
         sorted_model_reports = sorted(model_reports, key=lambda s: s.split("| ")[-1])
         model_failures_report = prepare_reports(
-            title="These following model modules had failures", header=model_header, reports=sorted_model_reports
+            title=f"These following {label} modules had failures", header=model_header, reports=sorted_model_reports
         )
 
         module_header = "Single |  Multi | Category\n"
         sorted_module_reports = sorted(other_module_reports, key=lambda s: s.split("| ")[-1])
         module_failures_report = prepare_reports(
-            title="The following non-model modules had failures", header=module_header, reports=sorted_module_reports
+            title=f"The following {label} modules had failures", header=module_header, reports=sorted_module_reports
         )
 
         # To be sent to Slack channels
-        model_failure_sections = [
-            {"type": "section", "text": {"type": "mrkdwn", "text": model_failures_report}},
-            {"type": "section", "text": {"type": "mrkdwn", "text": module_failures_report}},
-        ]
+        model_failure_sections = [{"type": "section", "text": {"type": "mrkdwn", "text": model_failures_report}}]
+        model_failure_sections.append({"type": "section", "text": {"type": "mrkdwn", "text": module_failures_report}})
 
         # Save the complete (i.e. no truncation) failure tables (of the current workflow run)
         # (to be uploaded as artifacts)
 
         model_failures_report = prepare_reports(
-            title="These following model modules had failures",
+            title=f"These following {label} modules had failures",
             header=model_header,
             reports=sorted_model_reports,
             to_truncate=False,
@@ -433,7 +480,7 @@ class Message:
             fp.write(model_failures_report)
 
         module_failures_report = prepare_reports(
-            title="The following non-model modules had failures",
+            title=f"The following {label} modules had failures",
             header=module_header,
             reports=sorted_module_reports,
             to_truncate=False,
@@ -511,7 +558,10 @@ class Message:
             blocks.append(self.failures)
 
         if self.n_model_failures > 0:
-            blocks.append(self.category_failures)
+            block = self.category_failures
+            if block["text"]["text"]:
+                blocks.append(block)
+
             for block in self.model_failures:
                 if block["text"]["text"]:
                     blocks.append(block)
@@ -565,7 +615,7 @@ class Message:
                         pattern = r"<(https://github.com/huggingface/transformers/actions/runs/.+?/job/.+?)\|(.+?)>"
                         items = re.findall(pattern, line)
                     elif "tests/" in line:
-                        if "tests/models/" in line:
+                        if "tests/models/" in line or "tests/quantization/" in line:
                             model = line.split("/")[2]
                         else:
                             model = line.split("/")[1]
@@ -609,7 +659,7 @@ class Message:
                         "text": {
                             "type": "mrkdwn",
                             # TODO: We should NOT assume it's always Nvidia CI, but it's the case at this moment.
-                            "text": f"*There are {nb_new_failed_tests} failed tests unique to this run*\n\n(compared to Nvidia CI: <https://github.com/huggingface/transformers/actions/runs/{prev_workflow_run_id}|{prev_workflow_run_id}>)",
+                            "text": f"*There are {nb_new_failed_tests} failed tests unique to {'this run' if not is_amd_daily_ci_workflow else 'AMD'}*\n\n(compared to Nvidia CI: <https://github.com/huggingface/transformers/actions/runs/{prev_workflow_run_id}|{prev_workflow_run_id}>)",
                         },
                         "accessory": {
                             "type": "button",
@@ -1058,13 +1108,24 @@ if __name__ == "__main__":
     # In our usage in `.github/workflows/slack-report.yml`, we always pass an argument when calling this script.
     # The argument could be an empty string `""` if a job doesn't depend on the job `setup`.
     if arguments[0] == "":
-        models = []
+        job_matrix = []
     else:
-        model_list_as_str = arguments[0]
+        job_matrix_as_str = arguments[0]
         try:
-            folder_slices = ast.literal_eval(model_list_as_str)
-            # Need to change from elements like `models/bert` to `models_bert` (the ones used as artifact names).
-            models = [x.replace("models/", "models_") for folders in folder_slices for x in folders]
+            folder_slices = ast.literal_eval(job_matrix_as_str)
+            if len(folder_slices) > 0:
+                if isinstance(folder_slices[0], list):
+                    # Need to change from elements like `models/bert` to `models_bert` (the ones used as artifact names).
+                    job_matrix = [
+                        x.replace("models/", "models_").replace("quantization/", "quantization_")
+                        for folders in folder_slices
+                        for x in folders
+                    ]
+                elif isinstance(folder_slices[0], str):
+                    job_matrix = [
+                        x.replace("models/", "models_").replace("quantization/", "quantization_")
+                        for x in folder_slices
+                    ]
         except Exception:
             Message.error_out(title, ci_title)
             raise ValueError("Errored out.")
@@ -1084,7 +1145,7 @@ if __name__ == "__main__":
 
     available_artifacts = retrieve_available_artifacts()
 
-    modeling_categories = [
+    test_categories = [
         "PyTorch",
         "TensorFlow",
         "Flax",
@@ -1093,35 +1154,34 @@ if __name__ == "__main__":
         "Trainer",
         "ONNX",
         "Auto",
+        "Quantization",
         "Unclassified",
     ]
 
     job_name = os.getenv("CI_TEST_JOB")
-    report_name_prefix = "run_models_gpu"
-    if job_name == "run_trainer_and_fsdp_gpu":
-        report_name_prefix = job_name
+    report_name_prefix = job_name
 
     # This dict will contain all the information relative to each model:
     # - Failures: the total, as well as the number of failures per-category defined above
     # - Success: total
     # - Time spent: as a comma-separated list of elapsed time
     # - Failures: as a line-break separated list of errors
-    model_results = {
-        model: {
-            "failed": {m: {"unclassified": 0, "single": 0, "multi": 0} for m in modeling_categories},
+    matrix_job_results = {
+        matrix_name: {
+            "failed": {m: {"unclassified": 0, "single": 0, "multi": 0} for m in test_categories},
             "success": 0,
             "time_spent": "",
             "failures": {},
             "job_link": {},
         }
-        for model in models
-        if f"{report_name_prefix}_{model}_test_reports" in available_artifacts
+        for matrix_name in job_matrix
+        if f"{report_name_prefix}_{matrix_name}_test_reports" in available_artifacts
     }
 
     unclassified_model_failures = []
 
-    for model in model_results.keys():
-        for artifact_path_dict in available_artifacts[f"{report_name_prefix}_{model}_test_reports"].paths:
+    for matrix_name in matrix_job_results.keys():
+        for artifact_path_dict in available_artifacts[f"{report_name_prefix}_{matrix_name}_test_reports"].paths:
             path = artifact_path_dict["path"]
             artifact_gpu = artifact_path_dict["gpu"]
 
@@ -1133,13 +1193,14 @@ if __name__ == "__main__":
             if "stats" in artifact:
                 # Link to the GitHub Action job
                 job = artifact_name_to_job_map[path]
-                model_results[model]["job_link"][artifact_gpu] = job["html_url"]
+                matrix_job_results[matrix_name]["job_link"][artifact_gpu] = job["html_url"]
                 failed, success, time_spent = handle_test_results(artifact["stats"])
-                model_results[model]["success"] += success
-                model_results[model]["time_spent"] += time_spent[1:-1] + ", "
+                matrix_job_results[matrix_name]["success"] += success
+                matrix_job_results[matrix_name]["time_spent"] += time_spent[1:-1] + ", "
 
                 stacktraces = handle_stacktraces(artifact["failures_line"])
 
+                # TODO: ???
                 for line in artifact["summary_short"].split("\n"):
                     if line.startswith("FAILED "):
                         # Avoid the extra `FAILED` entry given by `run_test_using_subprocess` causing issue when calling
@@ -1150,38 +1211,45 @@ if __name__ == "__main__":
                         line = line[len("FAILED ") :]
                         line = line.split()[0].replace("\n", "")
 
-                        if artifact_gpu not in model_results[model]["failures"]:
-                            model_results[model]["failures"][artifact_gpu] = []
+                        if artifact_gpu not in matrix_job_results[matrix_name]["failures"]:
+                            matrix_job_results[matrix_name]["failures"][artifact_gpu] = []
 
                         trace = pop_default(stacktraces, 0, "Cannot retrieve error message.")
-                        model_results[model]["failures"][artifact_gpu].append({"line": line, "trace": trace})
+                        matrix_job_results[matrix_name]["failures"][artifact_gpu].append(
+                            {"line": line, "trace": trace}
+                        )
 
-                        if re.search("test_modeling_tf_", line):
-                            model_results[model]["failed"]["TensorFlow"][artifact_gpu] += 1
+                        # TODO: How to deal wit this
+
+                        if re.search("tests/quantization", line):
+                            matrix_job_results[matrix_name]["failed"]["Quantization"][artifact_gpu] += 1
+
+                        elif re.search("test_modeling_tf_", line):
+                            matrix_job_results[matrix_name]["failed"]["TensorFlow"][artifact_gpu] += 1
 
                         elif re.search("test_modeling_flax_", line):
-                            model_results[model]["failed"]["Flax"][artifact_gpu] += 1
+                            matrix_job_results[matrix_name]["failed"]["Flax"][artifact_gpu] += 1
 
                         elif re.search("test_modeling", line):
-                            model_results[model]["failed"]["PyTorch"][artifact_gpu] += 1
+                            matrix_job_results[matrix_name]["failed"]["PyTorch"][artifact_gpu] += 1
 
                         elif re.search("test_tokenization", line):
-                            model_results[model]["failed"]["Tokenizers"][artifact_gpu] += 1
+                            matrix_job_results[matrix_name]["failed"]["Tokenizers"][artifact_gpu] += 1
 
                         elif re.search("test_pipelines", line):
-                            model_results[model]["failed"]["Pipelines"][artifact_gpu] += 1
+                            matrix_job_results[matrix_name]["failed"]["Pipelines"][artifact_gpu] += 1
 
                         elif re.search("test_trainer", line):
-                            model_results[model]["failed"]["Trainer"][artifact_gpu] += 1
+                            matrix_job_results[matrix_name]["failed"]["Trainer"][artifact_gpu] += 1
 
                         elif re.search("onnx", line):
-                            model_results[model]["failed"]["ONNX"][artifact_gpu] += 1
+                            matrix_job_results[matrix_name]["failed"]["ONNX"][artifact_gpu] += 1
 
                         elif re.search("auto", line):
-                            model_results[model]["failed"]["Auto"][artifact_gpu] += 1
+                            matrix_job_results[matrix_name]["failed"]["Auto"][artifact_gpu] += 1
 
                         else:
-                            model_results[model]["failed"]["Unclassified"][artifact_gpu] += 1
+                            matrix_job_results[matrix_name]["failed"]["Unclassified"][artifact_gpu] += 1
                             unclassified_model_failures.append(line)
 
     # Additional runs
@@ -1315,20 +1383,10 @@ if __name__ == "__main__":
             if "workflow_run" in event_payload:
                 is_scheduled_ci_run = event_payload["workflow_run"]["event"] == "schedule"
 
-    # The values are used as the file names where to save the corresponding CI job results.
-    test_to_result_name = {
-        "Models": "model",
-        "Trainer & FSDP": "trainer_and_fsdp",
-        "PyTorch pipelines": "torch_pipeline",
-        "TensorFlow pipelines": "tf_pipeline",
-        "Examples directory": "example",
-        "DeepSpeed": "deepspeed",
-    }
-
     test_name_and_result_pairs = []
-    if len(model_results) > 0:
+    if len(matrix_job_results) > 0:
         test_name = job_to_test_map[job_name]
-        test_name_and_result_pairs.append((test_name, model_results))
+        test_name_and_result_pairs.append((test_name, matrix_job_results))
 
     for test_name, result in additional_results.items():
         test_name_and_result_pairs.append((test_name, result))
@@ -1346,8 +1404,8 @@ if __name__ == "__main__":
         )
 
     # Let's create a file contain job --> job link
-    if len(model_results) > 0:
-        target_results = model_results
+    if len(matrix_job_results) > 0:
+        target_results = matrix_job_results
     else:
         target_results = additional_results[job_to_test_map[job_name]]
 
@@ -1360,6 +1418,8 @@ if __name__ == "__main__":
     for job, job_result in sorted_dict:
         if job.startswith("models_"):
             job = job[len("models_") :]
+        elif job.startswith("quantization_"):
+            job = job[len("quantization_") :]
         job_links[job] = job_result["job_link"]
 
     with open(f"ci_results_{job_name}/job_links.json", "w", encoding="UTF-8") as fp:
@@ -1424,7 +1484,7 @@ if __name__ == "__main__":
     message = Message(
         title,
         ci_title,
-        model_results,
+        matrix_job_results,
         additional_results,
         selected_warnings=selected_warnings,
         prev_ci_artifacts=prev_ci_artifacts,
diff --git a/utils/notification_service_quantization.py b/utils/notification_service_quantization.py
deleted file mode 100644
index b533a7a9cf1..00000000000
--- a/utils/notification_service_quantization.py
+++ /dev/null
@@ -1,294 +0,0 @@
-# Copyright 2024 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import ast
-import json
-import os
-import sys
-import time
-from typing import Dict
-
-from get_ci_error_statistics import get_jobs
-from get_previous_daily_ci import get_last_daily_ci_run
-from huggingface_hub import HfApi
-from notification_service import (
-    Message,
-    handle_stacktraces,
-    handle_test_results,
-    prepare_reports,
-    retrieve_artifact,
-    retrieve_available_artifacts,
-)
-from slack_sdk import WebClient
-
-
-api = HfApi()
-client = WebClient(token=os.environ["CI_SLACK_BOT_TOKEN"])
-
-
-class QuantizationMessage(Message):
-    def __init__(
-        self,
-        title: str,
-        results: Dict,
-    ):
-        self.title = title
-
-        # Failures and success of the modeling tests
-        self.n_success = sum(r["success"] for r in results.values())
-        self.single_gpu_failures = sum(r["failed"]["single"] for r in results.values())
-        self.multi_gpu_failures = sum(r["failed"]["multi"] for r in results.values())
-        self.n_failures = self.single_gpu_failures + self.multi_gpu_failures
-
-        self.n_tests = self.n_failures + self.n_success
-        self.results = results
-        self.thread_ts = None
-
-    @property
-    def payload(self) -> str:
-        blocks = [self.header]
-
-        if self.n_failures > 0:
-            blocks.append(self.failures_overwiew)
-            blocks.append(self.failures_detailed)
-
-        if self.n_failures == 0:
-            blocks.append(self.no_failures)
-
-        return json.dumps(blocks)
-
-    @property
-    def time(self) -> str:
-        all_results = self.results.values()
-        time_spent = []
-        for r in all_results:
-            if len(r["time_spent"]):
-                time_spent.extend([x for x in r["time_spent"].split(", ") if len(x.strip())])
-        total_secs = 0
-
-        for time in time_spent:
-            time_parts = time.split(":")
-
-            # Time can be formatted as xx:xx:xx, as .xx, or as x.xx if the time spent was less than a minute.
-            if len(time_parts) == 1:
-                time_parts = [0, 0, time_parts[0]]
-
-            hours, minutes, seconds = int(time_parts[0]), int(time_parts[1]), float(time_parts[2])
-            total_secs += hours * 3600 + minutes * 60 + seconds
-
-        hours, minutes, seconds = total_secs // 3600, (total_secs % 3600) // 60, total_secs % 60
-        return f"{int(hours)}h{int(minutes)}m{int(seconds)}s"
-
-    @property
-    def failures_overwiew(self) -> Dict:
-        return {
-            "type": "section",
-            "text": {
-                "type": "plain_text",
-                "text": (
-                    f"There were {self.n_failures} failures, out of {self.n_tests} tests.\n"
-                    f"The suite ran in {self.time}."
-                ),
-                "emoji": True,
-            },
-            "accessory": {
-                "type": "button",
-                "text": {"type": "plain_text", "text": "Check Action results", "emoji": True},
-                "url": f"https://github.com/huggingface/transformers/actions/runs/{os.environ['GITHUB_RUN_ID']}",
-            },
-        }
-
-    @property
-    def failures_detailed(self) -> Dict:
-        failures = {k: v["failed"] for k, v in self.results.items()}
-
-        individual_reports = []
-        for key, value in failures.items():
-            device_report = self.get_device_report(value)
-            if sum(value.values()):
-                report = f"{device_report}{key}"
-                individual_reports.append(report)
-
-        header = "Single |  Multi | Category\n"
-        failures_report = prepare_reports(
-            title="The following quantization tests had failures", header=header, reports=individual_reports
-        )
-
-        return {"type": "section", "text": {"type": "mrkdwn", "text": failures_report}}
-
-    def post(self):
-        payload = self.payload
-        print("Sending the following payload")
-        print(json.dumps({"blocks": json.loads(payload)}))
-
-        text = f"{self.n_failures} failures out of {self.n_tests} tests," if self.n_failures else "All tests passed."
-
-        self.thread_ts = client.chat_postMessage(
-            channel=SLACK_REPORT_CHANNEL_ID,
-            blocks=payload,
-            text=text,
-        )
-
-    def post_reply(self):
-        if self.thread_ts is None:
-            raise ValueError("Can only post reply if a post has been made.")
-
-        for job, job_result in self.results.items():
-            if len(job_result["failures"]):
-                for device, failures in job_result["failures"].items():
-                    blocks = self.get_reply_blocks(
-                        job,
-                        job_result,
-                        failures,
-                        device,
-                        text=f"Number of failures: {job_result['failed'][device]}",
-                    )
-
-                    print("Sending the following reply")
-                    print(json.dumps({"blocks": blocks}))
-
-                    client.chat_postMessage(
-                        channel="#transformers-ci-daily-quantization",
-                        text=f"Results for {job}",
-                        blocks=blocks,
-                        thread_ts=self.thread_ts["ts"],
-                    )
-                    time.sleep(1)
-
-
-if __name__ == "__main__":
-    setup_status = os.environ.get("SETUP_STATUS")
-    SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"]
-    setup_failed = True if setup_status is not None and setup_status != "success" else False
-
-    # This env. variable is set in workflow file (under the job `send_results`).
-    ci_event = os.environ["CI_EVENT"]
-
-    title = f"🤗 Results of the {ci_event} - {os.getenv('CI_TEST_JOB')}."
-
-    if setup_failed:
-        Message.error_out(
-            title, ci_title="", runner_not_available=False, runner_failed=False, setup_failed=setup_failed
-        )
-        exit(0)
-
-    arguments = sys.argv[1:][0]
-    try:
-        quantization_matrix = ast.literal_eval(arguments)
-        # Need to change from elements like `quantization/bnb` to `quantization_bnb` (the ones used as artifact names).
-        quantization_matrix = [x.replace("quantization/", "quantization_") for x in quantization_matrix]
-    except SyntaxError:
-        Message.error_out(title, ci_title="")
-        raise ValueError("Errored out.")
-
-    available_artifacts = retrieve_available_artifacts()
-
-    quantization_results = {
-        quant: {
-            "failed": {"single": 0, "multi": 0},
-            "success": 0,
-            "time_spent": "",
-            "failures": {},
-            "job_link": {},
-        }
-        for quant in quantization_matrix
-        if f"run_quantization_torch_gpu_{quant}_test_reports" in available_artifacts
-    }
-
-    github_actions_jobs = get_jobs(
-        workflow_run_id=os.environ["GITHUB_RUN_ID"], token=os.environ["ACCESS_REPO_INFO_TOKEN"]
-    )
-    github_actions_job_links = {job["name"]: job["html_url"] for job in github_actions_jobs}
-
-    artifact_name_to_job_map = {}
-    for job in github_actions_jobs:
-        for step in job["steps"]:
-            if step["name"].startswith("Test suite reports artifacts: "):
-                artifact_name = step["name"][len("Test suite reports artifacts: ") :]
-                artifact_name_to_job_map[artifact_name] = job
-                break
-
-    for quant in quantization_results.keys():
-        for artifact_path in available_artifacts[f"run_quantization_torch_gpu_{quant}_test_reports"].paths:
-            artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"])
-            if "stats" in artifact:
-                # Link to the GitHub Action job
-                job = artifact_name_to_job_map[artifact_path["path"]]
-                quantization_results[quant]["job_link"][artifact_path["gpu"]] = job["html_url"]
-                failed, success, time_spent = handle_test_results(artifact["stats"])
-                quantization_results[quant]["failed"][artifact_path["gpu"]] += failed
-                quantization_results[quant]["success"] += success
-                quantization_results[quant]["time_spent"] += time_spent[1:-1] + ", "
-
-                stacktraces = handle_stacktraces(artifact["failures_line"])
-
-                for line in artifact["summary_short"].split("\n"):
-                    if line.startswith("FAILED "):
-                        line = line[len("FAILED ") :]
-                        line = line.split()[0].replace("\n", "")
-
-                        if artifact_path["gpu"] not in quantization_results[quant]["failures"]:
-                            quantization_results[quant]["failures"][artifact_path["gpu"]] = []
-
-                        quantization_results[quant]["failures"][artifact_path["gpu"]].append(
-                            {"line": line, "trace": stacktraces.pop(0)}
-                        )
-
-    job_name = os.getenv("CI_TEST_JOB")
-
-    # if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
-    report_repo_subfolder = ""
-    if os.getenv("GITHUB_EVENT_NAME") != "schedule":
-        report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
-        report_repo_subfolder = f"runs/{report_repo_subfolder}"
-
-    workflow_run = get_last_daily_ci_run(
-        token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
-    )
-    workflow_run_created_time = workflow_run["created_at"]
-    workflow_id = workflow_run["workflow_id"]
-
-    report_repo_folder = workflow_run_created_time.split("T")[0]
-
-    if report_repo_subfolder:
-        report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
-
-    if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")):
-        os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
-
-    nvidia_daily_ci_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml"
-    is_nvidia_daily_ci_workflow = os.environ.get("GITHUB_WORKFLOW_REF").startswith(nvidia_daily_ci_workflow)
-    is_scheduled_ci_run = os.environ.get("GITHUB_EVENT_NAME") == "schedule"
-
-    with open(f"ci_results_{job_name}/quantization_results.json", "w", encoding="UTF-8") as fp:
-        json.dump(quantization_results, fp, indent=4, ensure_ascii=False)
-
-    report_repo_id = os.getenv("REPORT_REPO_ID")
-
-    # upload results to Hub dataset (only for the scheduled daily CI run on `main`)
-    api.upload_file(
-        path_or_fileobj=f"ci_results_{job_name}/quantization_results.json",
-        path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/quantization_results.json",
-        repo_id=report_repo_id,
-        repo_type="dataset",
-        token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
-    )
-
-    message = QuantizationMessage(
-        title,
-        results=quantization_results,
-    )
-
-    message.post()
-    message.post_reply()