From db2f5354439f887f4ae0a46fb3f4a6dd4bec3b45 Mon Sep 17 00:00:00 2001
From: jiqing-feng <jiqing.feng@intel.com>
Date: Wed, 2 Jul 2025 02:06:37 +0800
Subject: [PATCH 1/5] update bnb ground truth (#39117)

* update bnb resulte

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* set seed to avoid sampling different results

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix int8 tests

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix typo

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* add comments

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

---------

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
---
 tests/quantization/bnb/test_4bit.py       | 5 +++++
 tests/quantization/bnb/test_mixed_int8.py | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
index 9dc0bc396d9..fd72d13505c 100644
--- a/tests/quantization/bnb/test_4bit.py
+++ b/tests/quantization/bnb/test_4bit.py
@@ -27,6 +27,7 @@ from transformers import (
     AutoTokenizer,
     BitsAndBytesConfig,
     pipeline,
+    set_seed,
 )
 from transformers.models.opt.modeling_opt import OPTAttention
 from transformers.testing_utils import (
@@ -111,6 +112,8 @@ class Base4bitTest(unittest.TestCase):
     EXPECTED_OUTPUTS.add("Hello my name is John Doe, I am a student at the University")
     EXPECTED_OUTPUTS.add("Hello my name is John and I am 25 years old.")
     EXPECTED_OUTPUTS.add("Hello my name is John and I am a student at the University of")
+    # Expected values on Intel XPU and NV A100
+    EXPECTED_OUTPUTS.add("Hello my name is Alina. I have been working as a professional")
     MAX_NEW_TOKENS = 10
 
     def setUp(self):
@@ -513,6 +516,8 @@ class Pipeline4BitTest(Base4bitTest):
             max_new_tokens=self.MAX_NEW_TOKENS,
         )
 
+        # Avoid sampling different outputs
+        set_seed(42)
         # Real second forward pass
         pipeline_output = self.pipe(self.input_text)
         self.assertIn(pipeline_output[0]["generated_text"], self.EXPECTED_OUTPUTS)
diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
index 01755d8feee..304d97879f2 100644
--- a/tests/quantization/bnb/test_mixed_int8.py
+++ b/tests/quantization/bnb/test_mixed_int8.py
@@ -27,6 +27,7 @@ from transformers import (
     AutoTokenizer,
     BitsAndBytesConfig,
     pipeline,
+    set_seed,
 )
 from transformers.models.opt.modeling_opt import OPTAttention
 from transformers.testing_utils import (
@@ -113,6 +114,8 @@ class BaseMixedInt8Test(unittest.TestCase):
     MAX_NEW_TOKENS = 10
     # Expected values with offload
     EXPECTED_OUTPUTS.add("Hello my name is John and I am a professional photographer based in")
+    # Expected values on Intel XPU and NV A100
+    EXPECTED_OUTPUTS.add("Hello my name is Alina. I have been working as a professional")
 
     def setUp(self):
         # Models and tokenizer
@@ -649,6 +652,8 @@ class MixedInt8TestPipeline(BaseMixedInt8Test):
             max_new_tokens=self.MAX_NEW_TOKENS,
         )
 
+        # Avoid sampling different outputs
+        set_seed(42)
         # Real second forward pass
         pipeline_output = self.pipe(self.input_text)
         self.assertIn(pipeline_output[0]["generated_text"], self.EXPECTED_OUTPUTS)

From ab59cc27fe1e166095f1b53e050a718fa7e86f34 Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Tue, 1 Jul 2025 20:19:06 +0200
Subject: [PATCH 2/5] Suggest jobs to use in `run-slow` (#39100)

* pr

* pr

* pr

* pr

* pr

* pr

* pr

* pr

* pr

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 .github/workflows/get-pr-info.yml    | 157 ++++++++++++++++++++++++++
 .github/workflows/get-pr-number.yml  |  36 ++++++
 .github/workflows/pr_run_slow_ci.yml | 163 +++++++++++++++++++++++++++
 utils/get_pr_run_slow_jobs.py        | 133 ++++++++++++++++++++++
 4 files changed, 489 insertions(+)
 create mode 100644 .github/workflows/get-pr-info.yml
 create mode 100644 .github/workflows/get-pr-number.yml
 create mode 100644 .github/workflows/pr_run_slow_ci.yml
 create mode 100644 utils/get_pr_run_slow_jobs.py

diff --git a/.github/workflows/get-pr-info.yml b/.github/workflows/get-pr-info.yml
new file mode 100644
index 00000000000..989281e5b90
--- /dev/null
+++ b/.github/workflows/get-pr-info.yml
@@ -0,0 +1,157 @@
+name: Get PR commit SHA
+on:
+  workflow_call:
+    inputs:
+      pr_number:
+        required: true
+        type: string
+    outputs:
+      PR_HEAD_REPO_FULL_NAME:
+        description: "The full name of the repository from which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_FULL_NAME }}
+      PR_BASE_REPO_FULL_NAME:
+        description: "The full name of the repository to which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_FULL_NAME }}
+      PR_HEAD_REPO_OWNER:
+        description: "The owner of the repository from which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}
+      PR_BASE_REPO_OWNER:
+        description: "The owner of the repository to which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_OWNER }}
+      PR_HEAD_REPO_NAME:
+        description: "The name of the repository from which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}
+      PR_BASE_REPO_NAME:
+        description: "The name of the repository to which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_NAME }}
+      PR_HEAD_REF:
+        description: "The branch name of the pull request in the head repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REF }}
+      PR_BASE_REF:
+        description: "The branch name in the base repository (to merge into)"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REF }}
+      PR_HEAD_SHA:
+        description: "The head sha of the pull request branch in the head repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_SHA }}
+      PR_BASE_SHA:
+        description: "The head sha of the target branch in the base repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_SHA }}
+      PR_MERGE_COMMIT_SHA:
+        description: "The sha of the merge commit for the pull request (created by GitHub) in the base repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_SHA }}
+      PR_HEAD_COMMIT_DATE:
+        description: "The date of the head sha of the pull request branch in the head repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_DATE }}
+      PR_MERGE_COMMIT_DATE:
+        description: "The date of the merge commit for the pull request (created by GitHub) in the base repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }}
+      PR_HEAD_COMMIT_TIMESTAMP:
+        description: "The timestamp of the head sha of the pull request branch in the head repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_TIMESTAMP }}
+      PR_MERGE_COMMIT_TIMESTAMP:
+        description: "The timestamp of the merge commit for the pull request (created by GitHub) in the base repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
+      PR:
+        description: "The PR"
+        value: ${{ jobs.get-pr-info.outputs.PR }}
+      PR_FILES:
+        description: "The files touched in the PR"
+        value: ${{ jobs.get-pr-info.outputs.PR_FILES }}
+
+
+jobs:
+  get-pr-info:
+    runs-on: ubuntu-22.04
+    name: Get PR commit SHA better
+    outputs:
+      PR_HEAD_REPO_FULL_NAME: ${{ steps.pr_info.outputs.head_repo_full_name }}
+      PR_BASE_REPO_FULL_NAME: ${{ steps.pr_info.outputs.base_repo_full_name }}
+      PR_HEAD_REPO_OWNER: ${{ steps.pr_info.outputs.head_repo_owner }}
+      PR_BASE_REPO_OWNER: ${{ steps.pr_info.outputs.base_repo_owner }}
+      PR_HEAD_REPO_NAME: ${{ steps.pr_info.outputs.head_repo_name }}
+      PR_BASE_REPO_NAME: ${{ steps.pr_info.outputs.base_repo_name }}
+      PR_HEAD_REF: ${{ steps.pr_info.outputs.head_ref }}
+      PR_BASE_REF: ${{ steps.pr_info.outputs.base_ref }}
+      PR_HEAD_SHA: ${{ steps.pr_info.outputs.head_sha }}
+      PR_BASE_SHA: ${{ steps.pr_info.outputs.base_sha }}
+      PR_MERGE_COMMIT_SHA: ${{ steps.pr_info.outputs.merge_commit_sha }}
+      PR_HEAD_COMMIT_DATE: ${{ steps.pr_info.outputs.head_commit_date }}
+      PR_MERGE_COMMIT_DATE: ${{ steps.pr_info.outputs.merge_commit_date }}
+      PR_HEAD_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.head_commit_timestamp }}
+      PR_MERGE_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.merge_commit_timestamp }}
+      PR: ${{ steps.pr_info.outputs.pr }}
+      PR_FILES: ${{ steps.pr_info.outputs.files }}
+    if: ${{ inputs.pr_number != '' }}
+    steps:
+      - name: Extract PR details
+        id: pr_info
+        uses: actions/github-script@v6
+        with:
+          script: |            
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: ${{ inputs.pr_number }}
+            });
+
+            const { data: head_commit }  = await github.rest.repos.getCommit({
+              owner: pr.head.repo.owner.login,
+              repo: pr.head.repo.name,
+              ref: pr.head.ref
+            });
+
+            const { data: merge_commit }  = await github.rest.repos.getCommit({
+              owner: pr.base.repo.owner.login,
+              repo: pr.base.repo.name,
+              ref: pr.merge_commit_sha,
+            });
+
+            const { data: files } = await github.rest.pulls.listFiles({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: ${{ inputs.pr_number }}
+            });
+
+            core.setOutput('head_repo_full_name', pr.head.repo.full_name);
+            core.setOutput('base_repo_full_name', pr.base.repo.full_name);
+            core.setOutput('head_repo_owner', pr.head.repo.owner.login);
+            core.setOutput('base_repo_owner', pr.base.repo.owner.login);
+            core.setOutput('head_repo_name', pr.head.repo.name);
+            core.setOutput('base_repo_name', pr.base.repo.name);
+            core.setOutput('head_ref', pr.head.ref);
+            core.setOutput('base_ref', pr.base.ref);
+            core.setOutput('head_sha', pr.head.sha);
+            core.setOutput('base_sha', pr.base.sha);
+            core.setOutput('merge_commit_sha', pr.merge_commit_sha);
+            core.setOutput('pr', pr);
+
+            core.setOutput('head_commit_date', head_commit.commit.committer.date);
+            core.setOutput('merge_commit_date', merge_commit.commit.committer.date);
+            
+            core.setOutput('files', files);            
+            
+            console.log('PR head commit:', {
+              head_commit: head_commit,
+              commit: head_commit.commit,
+              date: head_commit.commit.committer.date
+            });
+
+            console.log('PR merge commit:', {
+              merge_commit: merge_commit,
+              commit: merge_commit.commit,
+              date: merge_commit.commit.committer.date
+            });
+
+      - name: Convert dates to timestamps
+        id: get_timestamps
+        run: |
+          head_commit_date=${{ steps.pr_info.outputs.head_commit_date }}
+          merge_commit_date=${{ steps.pr_info.outputs.merge_commit_date }}
+          echo $head_commit_date
+          echo $merge_commit_date
+          head_commit_timestamp=$(date -d "$head_commit_date" +%s)
+          merge_commit_timestamp=$(date -d "$merge_commit_date" +%s)
+          echo $head_commit_timestamp
+          echo $merge_commit_timestamp
+          echo "head_commit_timestamp=$head_commit_timestamp" >> $GITHUB_OUTPUT
+          echo "merge_commit_timestamp=$merge_commit_timestamp" >> $GITHUB_OUTPUT
diff --git a/.github/workflows/get-pr-number.yml b/.github/workflows/get-pr-number.yml
new file mode 100644
index 00000000000..316b0f7503f
--- /dev/null
+++ b/.github/workflows/get-pr-number.yml
@@ -0,0 +1,36 @@
+name: Get PR number
+on:
+  workflow_call:
+    outputs:
+      PR_NUMBER:
+        description: "The extracted PR number"
+        value: ${{ jobs.get-pr-number.outputs.PR_NUMBER }}
+
+jobs:
+  get-pr-number:
+    runs-on: ubuntu-22.04
+    name: Get PR number
+    outputs:
+      PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
+    steps:
+      - name: Get PR number
+        shell: bash
+        run: |
+          if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
+            echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
+          elif [[ "${{ github.event.pull_request.number }}" != "" ]]; then
+            echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
+          elif [[ "${{ github.event.pull_request }}" != "" ]]; then
+            echo "PR_NUMBER=${{ github.event.number }}" >> $GITHUB_ENV
+          else
+            echo "PR_NUMBER=" >> $GITHUB_ENV
+          fi
+
+      - name: Check PR number
+        shell: bash
+        run: |
+          echo "${{ env.PR_NUMBER }}"
+
+      - name: Set PR number
+        id: set_pr_number
+        run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
diff --git a/.github/workflows/pr_run_slow_ci.yml b/.github/workflows/pr_run_slow_ci.yml
new file mode 100644
index 00000000000..f3070a6f4d2
--- /dev/null
+++ b/.github/workflows/pr_run_slow_ci.yml
@@ -0,0 +1,163 @@
+name: PR slow CI
+on:
+  pull_request_target:
+    types: [opened, synchronize, reopened]
+
+jobs:
+  get-pr-number:
+    name: Get PR number
+    uses: ./.github/workflows/get-pr-number.yml
+
+  get-pr-info:
+    name: Get PR commit SHA
+    needs: get-pr-number
+    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
+    uses: ./.github/workflows/get-pr-info.yml
+    with:
+      pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
+
+  # We only need to verify the timestamp if the workflow is triggered by `issue_comment`.
+  verity_pr_commit:
+    name: Verity PR commit corresponds to a specific event by comparing timestamps
+    if: ${{ github.event.comment.created_at != '' }}
+    runs-on: ubuntu-22.04
+    needs: get-pr-info
+    env:
+      COMMENT_DATE: ${{ github.event.comment.created_at }}
+      PR_MERGE_COMMIT_DATE: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }}
+      PR_MERGE_COMMIT_TIMESTAMP: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
+    steps:
+      - run: |
+          COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s")
+          echo "COMMENT_DATE: $COMMENT_DATE"
+          echo "PR_MERGE_COMMIT_DATE: $PR_MERGE_COMMIT_DATE"
+          echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP"
+          echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP"
+          if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then
+            echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!";
+            exit -1;
+          fi
+
+  get-jobs:
+    name: Get test files to run
+    runs-on: ubuntu-22.04
+    needs: [get-pr-number, get-pr-info]
+    outputs:
+      jobs: ${{ steps.get_jobs.outputs.jobs_to_run }}
+    steps:
+      - name: Get repository content
+        id: repo_content
+        uses: actions/github-script@v6
+        with:
+          script: |
+            const { data: tests_dir } = await github.rest.repos.getContent({
+              owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
+              repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
+              path: 'tests',
+              ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
+            });
+
+            const { data: tests_models_dir } = await github.rest.repos.getContent({
+              owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
+              repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
+              path: 'tests/models',
+              ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
+            });
+
+            const { data: tests_quantization_dir } = await github.rest.repos.getContent({
+              owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
+              repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
+              path: 'tests/quantization',
+              ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
+            });
+
+            core.setOutput('tests_dir', tests_dir);
+            core.setOutput('tests_models_dir', tests_models_dir);
+            core.setOutput('tests_quantization_dir', tests_quantization_dir);
+
+      # This checkout to the main branch
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: "0"
+
+      - name: Write pr_files file
+        run: |
+          cat > pr_files.txt << 'EOF'
+          ${{ needs.get-pr-info.outputs.PR_FILES }}
+          EOF
+
+      - name: Write tests_dir file
+        run: |
+          cat > tests_dir.txt << 'EOF'
+          ${{ steps.repo_content.outputs.tests_dir }}
+          EOF
+
+      - name: Write tests_models_dir file
+        run: |
+          cat > tests_models_dir.txt << 'EOF'
+          ${{ steps.repo_content.outputs.tests_models_dir }}
+          EOF
+
+      - name: Write tests_quantization_dir file
+        run: |
+          cat > tests_quantization_dir.txt << 'EOF'
+          ${{ steps.repo_content.outputs.tests_quantization_dir }}
+          EOF
+
+      - name: Run script to get jobs to run
+        id: get_jobs
+        run: |
+          python utils/get_pr_run_slow_jobs.py | tee output.txt
+          echo "jobs_to_run: $(tail -n 1 output.txt)"
+          echo "jobs_to_run=$(tail -n 1 output.txt)" >> $GITHUB_OUTPUT
+
+  send_comment:
+    name: Send a comment to suggest jobs to run
+    if: ${{ needs.get-jobs.outputs.jobs != '' }}
+    needs: [get-pr-number, get-jobs]
+    permissions:
+      pull-requests: write
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Delete existing comment and send new one
+        uses: actions/github-script@v7
+        env:
+          BODY: "\n\nrun-slow: ${{ needs.get-jobs.outputs.jobs }}"
+        with:
+          script: |
+            const prNumber = ${{ needs.get-pr-number.outputs.PR_NUMBER }};
+            const commentPrefix = "**[For maintainers]** Suggested jobs to run (before merge)";
+            
+            // Get all comments on the PR
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber
+            });
+            
+            // Find existing comment(s) that start with our prefix
+            const existingComments = comments.filter(comment => 
+              comment.user.login === 'github-actions[bot]' && 
+              comment.body.startsWith(commentPrefix)
+            );
+            
+            // Delete existing comment(s)
+            for (const comment of existingComments) {
+              console.log(`Deleting existing comment #${comment.id}`);
+              await github.rest.issues.deleteComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: comment.id
+              });
+            }
+            
+            // Create new comment
+            const newBody = `${commentPrefix}${process.env.BODY}`;
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber,
+              body: newBody
+            });
+            
+            console.log('✅ Comment updated successfully');
\ No newline at end of file
diff --git a/utils/get_pr_run_slow_jobs.py b/utils/get_pr_run_slow_jobs.py
new file mode 100644
index 00000000000..fa56a6c305e
--- /dev/null
+++ b/utils/get_pr_run_slow_jobs.py
@@ -0,0 +1,133 @@
+import argparse
+import json
+import re
+import string
+
+
+MAX_NUM_JOBS_TO_SUGGEST = 16
+
+
+def get_jobs_to_run():
+    # The file `pr_files.txt` contains the information about the files changed in a pull request, and it is prepared by
+    # the caller (using GitHub api).
+    # We can also use the following api to get the information if we don't have them before calling this script.
+    # url = f"https://api.github.com/repos/huggingface/transformers/pulls/PULL_NUMBER/files?ref={pr_sha}"
+    with open("pr_files.txt") as fp:
+        pr_files = json.load(fp)
+        pr_files = [{k: v for k, v in item.items() if k in ["filename", "status"]} for item in pr_files]
+    pr_files = [item["filename"] for item in pr_files if item["status"] in ["added", "modified"]]
+
+    # models or quantizers
+    re_1 = re.compile(r"src/transformers/(models/.*)/modeling_.*\.py")
+    re_2 = re.compile(r"src/transformers/(quantizers/quantizer_.*)\.py")
+
+    # tests for models or quantizers
+    re_3 = re.compile(r"tests/(models/.*)/test_.*\.py")
+    re_4 = re.compile(r"tests/(quantization/.*)/test_.*\.py")
+
+    # files in a model directory but not necessary a modeling file
+    re_5 = re.compile(r"src/transformers/(models/.*)/.*\.py")
+
+    regexes = [re_1, re_2, re_3, re_4, re_5]
+
+    jobs_to_run = []
+    for pr_file in pr_files:
+        for regex in regexes:
+            matched = regex.findall(pr_file)
+            if len(matched) > 0:
+                item = matched[0]
+                item = item.replace("quantizers/quantizer_", "quantization/")
+                # TODO: for files in `quantizers`, the processed item above may not exist. Try using a fuzzy matching
+                if item in repo_content:
+                    jobs_to_run.append(item)
+                break
+    jobs_to_run = sorted(set(jobs_to_run))
+
+    return jobs_to_run
+
+
+def parse_message(message: str) -> str:
+    """
+    Parses a GitHub pull request's comment to find the models specified in it to run slow CI.
+
+    Args:
+        message (`str`): The body of a GitHub pull request's comment.
+
+    Returns:
+        `str`: The substring in `message` after `run-slow`, run_slow` or run slow`. If no such prefix is found, the
+        empty string is returned.
+    """
+    if message is None:
+        return ""
+
+    message = message.strip().lower()
+
+    # run-slow: model_1, model_2, quantization_1, quantization_2
+    if not message.startswith(("run-slow", "run_slow", "run slow")):
+        return ""
+    message = message[len("run slow") :]
+    # remove leading `:`
+    while message.strip().startswith(":"):
+        message = message.strip()[1:]
+
+    return message
+
+
+def get_jobs(message: str):
+    models = parse_message(message)
+    return models.replace(",", " ").split()
+
+
+def check_name(model_name: str):
+    allowed = string.ascii_letters + string.digits + "_"
+    return not (model_name.startswith("_") or model_name.endswith("_")) and all(c in allowed for c in model_name)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--message", type=str, default="", help="The content of a comment.")
+    parser.add_argument("--quantization", action="store_true", help="If we collect quantization tests")
+    args = parser.parse_args()
+
+    # The files are prepared by the caller (using GitHub api).
+    # We can also use the following api to get the information if we don't have them before calling this script.
+    # url = f"https://api.github.com/repos/OWNER/REPO/contents/PATH?ref={pr_sha}"
+    # (we avoid to checkout the repository using `actions/checkout` to reduce the run time, but mostly to avoid the potential security issue as much as possible)
+    repo_content = []
+    for filename in ["tests_dir.txt", "tests_models_dir.txt", "tests_quantization_dir.txt"]:
+        with open(filename) as fp:
+            data = json.load(fp)
+            data = [item["path"][len("tests/") :] for item in data if item["type"] == "dir"]
+            repo_content.extend(data)
+
+    # These don't have the prefix `models/` or `quantization/`, so we need to add them.
+    if args.message:
+        specified_jobs = get_jobs(args.message)
+        specified_jobs = [job for job in specified_jobs if check_name(job)]
+
+        # Add prefix (`models/` or `quantization`)
+        jobs_to_run = []
+        for job in specified_jobs:
+            if not args.quantization:
+                if f"models/{job}" in repo_content:
+                    jobs_to_run.append(f"models/{job}")
+                elif job in repo_content and job != "quantization":
+                    jobs_to_run.append(job)
+            elif f"quantization/{job}" in repo_content:
+                jobs_to_run.append(f"quantization/{job}")
+
+        print(sorted(set(jobs_to_run)))
+
+    else:
+        # Compute (from the added/modified files) the directories under `tests/`, `tests/models/` and `tests/quantization`to run tests.
+        # These are already with the prefix `models/` or `quantization/`, so we don't need to add them.
+        jobs_to_run = get_jobs_to_run()
+        jobs_to_run = [x.replace("models/", "").replace("quantization/", "") for x in jobs_to_run]
+        jobs_to_run = [job for job in jobs_to_run if check_name(job)]
+
+        if len(jobs_to_run) > MAX_NUM_JOBS_TO_SUGGEST:
+            jobs_to_run = jobs_to_run[:MAX_NUM_JOBS_TO_SUGGEST]
+
+        suggestion = f"{', '.join(jobs_to_run)}"
+
+        print(suggestion)

From 4c1715b6109184b062198793c3922ae1cffa79f9 Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Tue, 1 Jul 2025 20:54:31 +0200
Subject: [PATCH 3/5] Update expected values (after switching to A10) (#39157)

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* empty

* fix

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 .../test_modeling_conditional_detr.py         | 31 +++++++----
 .../models/convnext/test_modeling_convnext.py |  4 +-
 tests/models/cvt/test_modeling_cvt.py         |  8 ++-
 tests/models/d_fine/test_modeling_d_fine.py   | 28 +++++-----
 .../models/dab_detr/test_modeling_dab_detr.py | 28 +++++++---
 .../test_modeling_deformable_detr.py          | 51 ++++++++++++------
 tests/models/detr/test_modeling_detr.py       | 54 +++++++++++++------
 7 files changed, 138 insertions(+), 66 deletions(-)

diff --git a/tests/models/conditional_detr/test_modeling_conditional_detr.py b/tests/models/conditional_detr/test_modeling_conditional_detr.py
index f752e58c6af..813d2bd7967 100644
--- a/tests/models/conditional_detr/test_modeling_conditional_detr.py
+++ b/tests/models/conditional_detr/test_modeling_conditional_detr.py
@@ -570,9 +570,14 @@ class ConditionalDetrModelIntegrationTests(unittest.TestCase):
         expected_shape = torch.Size((1, 300, 256))
         self.assertEqual(outputs.last_hidden_state.shape, expected_shape)
         expected_slice = torch.tensor(
-            [[0.4222, 0.7471, 0.8760], [0.6395, -0.2729, 0.7127], [-0.3090, 0.7642, 0.9529]]
+            [
+                [0.4223, 0.7474, 0.8760],
+                [0.6397, -0.2727, 0.7126],
+                [-0.3089, 0.7643, 0.9529],
+            ]
         ).to(torch_device)
-        torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
+
+        torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)
 
     def test_inference_object_detection_head(self):
         model = ConditionalDetrForObjectDetection.from_pretrained("microsoft/conditional-detr-resnet-50").to(
@@ -592,26 +597,34 @@ class ConditionalDetrModelIntegrationTests(unittest.TestCase):
         expected_shape_logits = torch.Size((1, model.config.num_queries, model.config.num_labels))
         self.assertEqual(outputs.logits.shape, expected_shape_logits)
         expected_slice_logits = torch.tensor(
-            [[-10.4372, -5.7558, -8.6764], [-10.5410, -5.8704, -8.0590], [-10.6827, -6.3469, -8.3923]]
+            [
+                [-10.4371, -5.7565, -8.6765],
+                [-10.5413, -5.8700, -8.0589],
+                [-10.6824, -6.3477, -8.3927],
+            ]
         ).to(torch_device)
-        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, rtol=2e-4, atol=2e-4)
 
         expected_shape_boxes = torch.Size((1, model.config.num_queries, 4))
         self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
         expected_slice_boxes = torch.tensor(
-            [[0.7733, 0.6576, 0.4496], [0.5171, 0.1184, 0.9094], [0.8846, 0.5647, 0.2486]]
+            [
+                [0.7733, 0.6576, 0.4496],
+                [0.5171, 0.1184, 0.9095],
+                [0.8846, 0.5647, 0.2486],
+            ]
         ).to(torch_device)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, rtol=2e-4, atol=2e-4)
 
         # verify postprocessing
         results = image_processor.post_process_object_detection(
             outputs, threshold=0.3, target_sizes=[image.size[::-1]]
         )[0]
-        expected_scores = torch.tensor([0.8330, 0.8313, 0.8039, 0.6829, 0.5355]).to(torch_device)
+        expected_scores = torch.tensor([0.8330, 0.8315, 0.8039, 0.6829, 0.5354]).to(torch_device)
         expected_labels = [75, 17, 17, 75, 63]
-        expected_slice_boxes = torch.tensor([38.3089, 72.1022, 177.6293, 118.4512]).to(torch_device)
+        expected_slice_boxes = torch.tensor([38.3109, 72.1002, 177.6301, 118.4511]).to(torch_device)
 
         self.assertEqual(len(results["scores"]), 5)
-        torch.testing.assert_close(results["scores"], expected_scores, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(results["scores"], expected_scores, rtol=2e-4, atol=2e-4)
         self.assertSequenceEqual(results["labels"].tolist(), expected_labels)
         torch.testing.assert_close(results["boxes"][0, :], expected_slice_boxes)
diff --git a/tests/models/convnext/test_modeling_convnext.py b/tests/models/convnext/test_modeling_convnext.py
index fce8f4a35b4..65df028ce6e 100644
--- a/tests/models/convnext/test_modeling_convnext.py
+++ b/tests/models/convnext/test_modeling_convnext.py
@@ -286,9 +286,9 @@ class ConvNextModelIntegrationTest(unittest.TestCase):
         expected_shape = torch.Size((1, 1000))
         self.assertEqual(outputs.logits.shape, expected_shape)
 
-        expected_slice = torch.tensor([-0.0260, -0.4739, 0.1911]).to(torch_device)
+        expected_slice = torch.tensor([-0.0261, -0.4739, 0.1910]).to(torch_device)
 
-        torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)
 
 
 @require_torch
diff --git a/tests/models/cvt/test_modeling_cvt.py b/tests/models/cvt/test_modeling_cvt.py
index cb7007bb6b1..f0b6b414335 100644
--- a/tests/models/cvt/test_modeling_cvt.py
+++ b/tests/models/cvt/test_modeling_cvt.py
@@ -185,6 +185,10 @@ class CvtModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
     def test_model_get_set_embeddings(self):
         pass
 
+    # Larger differences on A10 than T4
+    def test_batching_equivalence(self, atol=2e-4, rtol=2e-4):
+        super().test_batching_equivalence(atol=atol, rtol=rtol)
+
     def test_model(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_model(*config_and_inputs)
@@ -265,6 +269,6 @@ class CvtModelIntegrationTest(unittest.TestCase):
         expected_shape = torch.Size((1, 1000))
         self.assertEqual(outputs.logits.shape, expected_shape)
 
-        expected_slice = torch.tensor([0.9285, 0.9015, -0.3150]).to(torch_device)
+        expected_slice = torch.tensor([0.9287, 0.9016, -0.3152]).to(torch_device)
 
-        torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)
diff --git a/tests/models/d_fine/test_modeling_d_fine.py b/tests/models/d_fine/test_modeling_d_fine.py
index 2b517572bb2..b26db579d0f 100644
--- a/tests/models/d_fine/test_modeling_d_fine.py
+++ b/tests/models/d_fine/test_modeling_d_fine.py
@@ -758,6 +758,7 @@ def prepare_img():
 
 @require_torch
 @require_vision
+@slow
 class DFineModelIntegrationTest(unittest.TestCase):
     @cached_property
     def default_image_processor(self):
@@ -778,37 +779,38 @@ class DFineModelIntegrationTest(unittest.TestCase):
 
         expected_logits = torch.tensor(
             [
-                [-3.8097816, -4.7724586, -5.994499],
-                [-5.2974715, -9.499067, -6.1653666],
-                [-5.3502765, -3.9530406, -6.3630295],
+                [-3.8221, -4.7679, -6.0063],
+                [-5.2994, -9.5009, -6.1697],
+                [-5.3103, -3.8005, -6.2972],
             ]
         ).to(torch_device)
         expected_boxes = torch.tensor(
             [
-                [0.7677696, 0.41479152, 0.46441072],
-                [0.16912134, 0.19869131, 0.2123824],
-                [0.2581653, 0.54818195, 0.47512347],
+                [0.7678, 0.4148, 0.4644],
+                [0.1691, 0.1987, 0.2124],
+                [0.2582, 0.5482, 0.4751],
             ]
         ).to(torch_device)
 
-        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, atol=1e-4, rtol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, atol=2e-4, rtol=2e-4)
 
         expected_shape_boxes = torch.Size((1, 300, 4))
         self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=1e-4, rtol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=2e-4, rtol=2e-4)
 
         # verify postprocessing
         results = image_processor.post_process_object_detection(
             outputs, threshold=0.0, target_sizes=[image.size[::-1]]
         )[0]
-        expected_scores = torch.tensor([0.9642, 0.9542, 0.9536, 0.8548], device=torch_device)
+
+        expected_scores = torch.tensor([0.9616, 0.9541, 0.9541, 0.8551], device=torch_device)
         expected_labels = [15, 65, 15, 57]
         expected_slice_boxes = torch.tensor(
             [
-                [1.3186283e01, 5.4130211e01, 3.1726535e02, 4.7212445e02],
-                [4.0275269e01, 7.2975174e01, 1.7620003e02, 1.1776848e02],
-                [3.4276117e02, 2.3427944e01, 6.3998401e02, 3.7477191e02],
-                [5.8418274e-01, 1.1794567e00, 6.3933154e02, 4.7485995e02],
+                [1.3358e01, 5.4123e01, 3.1726e02, 4.7222e02],
+                [4.0274e01, 7.2972e01, 1.7620e02, 1.1777e02],
+                [3.4270e02, 2.3427e01, 6.3998e02, 3.7476e02],
+                [5.7796e-01, 1.1773e00, 6.3933e02, 4.7486e02],
             ],
             device=torch_device,
         )
diff --git a/tests/models/dab_detr/test_modeling_dab_detr.py b/tests/models/dab_detr/test_modeling_dab_detr.py
index 8b4d8c139dc..126c9d7f693 100644
--- a/tests/models/dab_detr/test_modeling_dab_detr.py
+++ b/tests/models/dab_detr/test_modeling_dab_detr.py
@@ -787,7 +787,11 @@ class DabDetrModelIntegrationTests(unittest.TestCase):
         expected_shape = torch.Size((1, 300, 256))
         self.assertEqual(outputs.last_hidden_state.shape, expected_shape)
         expected_slice = torch.tensor(
-            [[-0.4879, -0.2594, 0.4524], [-0.4997, -0.4258, 0.4329], [-0.8220, -0.4996, 0.0577]]
+            [
+                [-0.4878, -0.2593, 0.4521],
+                [-0.4999, -0.4257, 0.4326],
+                [-0.8220, -0.4997, 0.0578],
+            ]
         ).to(torch_device)
         torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, atol=2e-4, rtol=2e-4)
 
@@ -806,26 +810,34 @@ class DabDetrModelIntegrationTests(unittest.TestCase):
         expected_shape_logits = torch.Size((1, model.config.num_queries, model.config.num_labels))
         self.assertEqual(outputs.logits.shape, expected_shape_logits)
         expected_slice_logits = torch.tensor(
-            [[-10.1765, -5.5243, -8.9324], [-9.8138, -5.6721, -7.5161], [-10.3054, -5.6081, -8.5931]]
+            [
+                [-10.1764, -5.5247, -8.9324],
+                [-9.8137, -5.6730, -7.5163],
+                [-10.3056, -5.6075, -8.5935],
+            ]
         ).to(torch_device)
         torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, atol=3e-4, rtol=3e-4)
 
         expected_shape_boxes = torch.Size((1, model.config.num_queries, 4))
         self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
         expected_slice_boxes = torch.tensor(
-            [[0.3708, 0.3000, 0.2753], [0.5211, 0.6125, 0.9495], [0.2897, 0.6730, 0.5459]]
+            [
+                [0.3708, 0.3000, 0.2754],
+                [0.5211, 0.6126, 0.9494],
+                [0.2897, 0.6731, 0.5460],
+            ]
         ).to(torch_device)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, atol=1e-4, rtol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, atol=3e-4, rtol=3e-4)
 
         # verify postprocessing
         results = image_processor.post_process_object_detection(
             outputs, threshold=0.3, target_sizes=[image.size[::-1]]
         )[0]
-        expected_scores = torch.tensor([0.8732, 0.8563, 0.8554, 0.6079, 0.5896]).to(torch_device)
+        expected_scores = torch.tensor([0.8732, 0.8563, 0.8554, 0.6080, 0.5895]).to(torch_device)
         expected_labels = [17, 75, 17, 75, 63]
-        expected_boxes = torch.tensor([14.6970, 49.3892, 320.5165, 469.2765]).to(torch_device)
+        expected_boxes = torch.tensor([14.6931, 49.3886, 320.5176, 469.2762]).to(torch_device)
 
         self.assertEqual(len(results["scores"]), 5)
-        torch.testing.assert_close(results["scores"], expected_scores, atol=1e-4, rtol=1e-4)
+        torch.testing.assert_close(results["scores"], expected_scores, atol=3e-4, rtol=3e-4)
         self.assertSequenceEqual(results["labels"].tolist(), expected_labels)
-        torch.testing.assert_close(results["boxes"][0, :], expected_boxes, atol=1e-4, rtol=1e-4)
+        torch.testing.assert_close(results["boxes"][0, :], expected_boxes, atol=3e-4, rtol=3e-4)
diff --git a/tests/models/deformable_detr/test_modeling_deformable_detr.py b/tests/models/deformable_detr/test_modeling_deformable_detr.py
index 7052b74957d..fc30b10e142 100644
--- a/tests/models/deformable_detr/test_modeling_deformable_detr.py
+++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py
@@ -677,30 +677,38 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase):
         self.assertEqual(outputs.logits.shape, expected_shape_logits)
 
         expected_logits = torch.tensor(
-            [[-9.6645, -4.3449, -5.8705], [-9.7035, -3.8504, -5.0724], [-10.5634, -5.3379, -7.5116]]
+            [
+                [-9.6644, -4.3434, -5.8707],
+                [-9.7035, -3.8503, -5.0721],
+                [-10.5633, -5.3387, -7.5119],
+            ]
         ).to(torch_device)
         expected_boxes = torch.tensor(
-            [[0.8693, 0.2289, 0.2492], [0.3150, 0.5489, 0.5845], [0.5563, 0.7580, 0.8518]]
+            [
+                [0.8693, 0.2290, 0.2492],
+                [0.3150, 0.5489, 0.5845],
+                [0.5563, 0.7580, 0.8518],
+            ]
         ).to(torch_device)
 
-        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, rtol=2e-4, atol=2e-4)
 
         expected_shape_boxes = torch.Size((1, model.config.num_queries, 4))
         self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, rtol=2e-4, atol=2e-4)
 
         # verify postprocessing
         results = image_processor.post_process_object_detection(
             outputs, threshold=0.3, target_sizes=[image.size[::-1]]
         )[0]
-        expected_scores = torch.tensor([0.7999, 0.7894, 0.6331, 0.4720, 0.4382]).to(torch_device)
+        expected_scores = torch.tensor([0.7999, 0.7895, 0.6332, 0.4719, 0.4382]).to(torch_device)
         expected_labels = [17, 17, 75, 75, 63]
-        expected_slice_boxes = torch.tensor([16.5028, 52.8390, 318.2544, 470.7841]).to(torch_device)
+        expected_slice_boxes = torch.tensor([16.4960, 52.8387, 318.2565, 470.7831]).to(torch_device)
 
         self.assertEqual(len(results["scores"]), 5)
-        torch.testing.assert_close(results["scores"], expected_scores, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(results["scores"], expected_scores, rtol=2e-4, atol=2e-4)
         self.assertSequenceEqual(results["labels"].tolist(), expected_labels)
-        torch.testing.assert_close(results["boxes"][0, :], expected_slice_boxes)
+        torch.testing.assert_close(results["boxes"][0, :], expected_slice_boxes, rtol=2e-4, atol=2e-4)
 
     def test_inference_object_detection_head_with_box_refine_two_stage(self):
         model = DeformableDetrForObjectDetection.from_pretrained(
@@ -720,17 +728,25 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase):
         self.assertEqual(outputs.logits.shape, expected_shape_logits)
 
         expected_logits = torch.tensor(
-            [[-6.7108, -4.3213, -6.3777], [-8.9014, -6.1799, -6.7240], [-6.9315, -4.4735, -6.2298]]
+            [
+                [-6.7112, -4.3216, -6.3781],
+                [-8.9035, -6.1738, -6.7249],
+                [-6.9314, -4.4736, -6.2303],
+            ]
         ).to(torch_device)
         expected_boxes = torch.tensor(
-            [[0.2583, 0.5499, 0.4683], [0.7652, 0.9068, 0.4882], [0.5490, 0.2763, 0.0564]]
+            [
+                [0.2582, 0.5499, 0.4683],
+                [0.7652, 0.9084, 0.4884],
+                [0.5490, 0.2763, 0.0564],
+            ]
         ).to(torch_device)
 
-        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, rtol=2e-4, atol=2e-4)
 
         expected_shape_boxes = torch.Size((1, model.config.num_queries, 4))
         self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, rtol=2e-4, atol=2e-4)
 
     @require_torch_accelerator
     def test_inference_object_detection_head_equivalence_cpu_accelerator(self):
@@ -753,10 +769,15 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase):
             gpu_outputs = model(pixel_values.to(torch_device), pixel_mask.to(torch_device))
 
         # 3. assert equivalence
+        # (on A10, the differences get larger than on T4)
         for key in cpu_outputs.keys():
-            assert torch.allclose(cpu_outputs[key], gpu_outputs[key].cpu(), atol=1e-4)
+            torch.testing.assert_close(cpu_outputs[key], gpu_outputs[key].cpu(), atol=2e-2, rtol=2e-2)
 
         expected_logits = torch.tensor(
-            [[-9.9051, -4.2541, -6.4852], [-9.6947, -4.0854, -6.8033], [-10.0665, -5.8470, -7.7003]]
+            [
+                [-9.9051, -4.2541, -6.4852],
+                [-9.6947, -4.0854, -6.8033],
+                [-10.0665, -5.8470, -7.7003],
+            ]
         )
-        assert torch.allclose(cpu_outputs.logits[0, :3, :3], expected_logits, atol=1e-4)
+        assert torch.allclose(cpu_outputs.logits[0, :3, :3], expected_logits, atol=2e-4)
diff --git a/tests/models/detr/test_modeling_detr.py b/tests/models/detr/test_modeling_detr.py
index b626f74c5c5..2af2ca92115 100644
--- a/tests/models/detr/test_modeling_detr.py
+++ b/tests/models/detr/test_modeling_detr.py
@@ -586,9 +586,13 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
         expected_shape = torch.Size((1, 100, 256))
         assert outputs.last_hidden_state.shape == expected_shape
         expected_slice = torch.tensor(
-            [[0.0616, -0.5146, -0.4032], [-0.7629, -0.4934, -1.7153], [-0.4768, -0.6403, -0.7826]]
+            [
+                [0.0622, -0.5142, -0.4034],
+                [-0.7628, -0.4935, -1.7153],
+                [-0.4751, -0.6386, -0.7818],
+            ]
         ).to(torch_device)
-        torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)
 
     def test_inference_object_detection_head(self):
         model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50").to(torch_device)
@@ -606,16 +610,24 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
         expected_shape_logits = torch.Size((1, model.config.num_queries, model.config.num_labels + 1))
         self.assertEqual(outputs.logits.shape, expected_shape_logits)
         expected_slice_logits = torch.tensor(
-            [[-19.1194, -0.0893, -11.0154], [-17.3640, -1.8035, -14.0219], [-20.0461, -0.5837, -11.1060]]
+            [
+                [-19.1211, -0.0881, -11.0188],
+                [-17.3641, -1.8045, -14.0229],
+                [-20.0415, -0.5833, -11.1005],
+            ]
         ).to(torch_device)
-        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, rtol=2e-4, atol=2e-4)
 
         expected_shape_boxes = torch.Size((1, model.config.num_queries, 4))
         self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
         expected_slice_boxes = torch.tensor(
-            [[0.4433, 0.5302, 0.8853], [0.5494, 0.2517, 0.0529], [0.4998, 0.5360, 0.9956]]
+            [
+                [0.4433, 0.5302, 0.8852],
+                [0.5494, 0.2517, 0.0529],
+                [0.4998, 0.5360, 0.9955],
+            ]
         ).to(torch_device)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, rtol=2e-4, atol=2e-4)
 
         # verify postprocessing
         results = image_processor.post_process_object_detection(
@@ -623,12 +635,12 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
         )[0]
         expected_scores = torch.tensor([0.9982, 0.9960, 0.9955, 0.9988, 0.9987]).to(torch_device)
         expected_labels = [75, 75, 63, 17, 17]
-        expected_slice_boxes = torch.tensor([40.1633, 70.8115, 175.5471, 117.9841]).to(torch_device)
+        expected_slice_boxes = torch.tensor([40.1615, 70.8090, 175.5476, 117.9810]).to(torch_device)
 
         self.assertEqual(len(results["scores"]), 5)
-        torch.testing.assert_close(results["scores"], expected_scores, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(results["scores"], expected_scores, rtol=2e-4, atol=2e-4)
         self.assertSequenceEqual(results["labels"].tolist(), expected_labels)
-        torch.testing.assert_close(results["boxes"][0, :], expected_slice_boxes)
+        torch.testing.assert_close(results["boxes"][0, :], expected_slice_boxes, rtol=2e-4, atol=2e-4)
 
     def test_inference_panoptic_segmentation_head(self):
         model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic").to(torch_device)
@@ -646,23 +658,27 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
         expected_shape_logits = torch.Size((1, model.config.num_queries, model.config.num_labels + 1))
         self.assertEqual(outputs.logits.shape, expected_shape_logits)
         expected_slice_logits = torch.tensor(
-            [[-18.1565, -1.7568, -13.5029], [-16.8888, -1.4138, -14.1028], [-17.5709, -2.5080, -11.8654]]
+            [
+                [-18.1523, -1.7592, -13.5019],
+                [-16.8866, -1.4139, -14.1025],
+                [-17.5735, -2.5090, -11.8666],
+            ]
         ).to(torch_device)
-        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, rtol=2e-4, atol=2e-4)
 
         expected_shape_boxes = torch.Size((1, model.config.num_queries, 4))
         self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
         expected_slice_boxes = torch.tensor(
-            [[0.5344, 0.1789, 0.9285], [0.4420, 0.0572, 0.0875], [0.6630, 0.6887, 0.1017]]
+            [[0.5344, 0.1790, 0.9284], [0.4421, 0.0571, 0.0875], [0.6632, 0.6886, 0.1015]]
         ).to(torch_device)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, rtol=2e-4, atol=2e-4)
 
         expected_shape_masks = torch.Size((1, model.config.num_queries, 200, 267))
         self.assertEqual(outputs.pred_masks.shape, expected_shape_masks)
         expected_slice_masks = torch.tensor(
-            [[-7.7558, -10.8788, -11.9797], [-11.8881, -16.4329, -17.7451], [-14.7316, -19.7383, -20.3004]]
+            [[-7.8408, -11.0104, -12.1279], [-12.0299, -16.6498, -17.9806], [-14.8995, -19.9940, -20.5646]]
         ).to(torch_device)
-        torch.testing.assert_close(outputs.pred_masks[0, 0, :3, :3], expected_slice_masks, rtol=1e-3, atol=1e-3)
+        torch.testing.assert_close(outputs.pred_masks[0, 0, :3, :3], expected_slice_masks, rtol=2e-3, atol=2e-3)
 
         # verify postprocessing
         results = image_processor.post_process_panoptic_segmentation(
@@ -674,7 +690,7 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
             torch_device
         )
         expected_number_of_segments = 5
-        expected_first_segment = {"id": 1, "label_id": 17, "was_fused": False, "score": 0.994097}
+        expected_first_segment = {"id": 1, "label_id": 17, "was_fused": False, "score": 0.9941}
 
         number_of_unique_segments = len(torch.unique(results["segmentation"]))
         self.assertTrue(
@@ -716,6 +732,10 @@ class DetrModelIntegrationTests(unittest.TestCase):
         expected_shape = torch.Size((1, 100, 256))
         assert outputs.last_hidden_state.shape == expected_shape
         expected_slice = torch.tensor(
-            [[0.0616, -0.5146, -0.4032], [-0.7629, -0.4934, -1.7153], [-0.4768, -0.6403, -0.7826]]
+            [
+                [0.0622, -0.5142, -0.4034],
+                [-0.7628, -0.4935, -1.7153],
+                [-0.4751, -0.6386, -0.7818],
+            ]
         ).to(torch_device)
         torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)

From 8e87adc45f20ba88360afbc29ab3f7a0063bf720 Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Tue, 1 Jul 2025 23:27:22 +0200
Subject: [PATCH 4/5] fix `llama` tests (#39161)

* fix

* fix

* fix

* fix

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 tests/models/llama/test_modeling_llama.py     | 30 +++++++------------
 tests/models/llama/test_tokenization_llama.py |  2 ++
 2 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
index 2e0e9126b1d..fcd060a37b8 100644
--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -25,6 +25,7 @@ from transformers.testing_utils import (
     require_read_token,
     require_torch,
     require_torch_accelerator,
+    run_test_using_subprocess,
     slow,
     torch_device,
 )
@@ -96,36 +97,28 @@ class LlamaModelTest(CausalLMModelTest, unittest.TestCase):
 
 
 @require_torch_accelerator
+@require_read_token
 class LlamaIntegrationTest(unittest.TestCase):
+    def setup(self):
+        cleanup(torch_device, gc_collect=True)
+
     def tearDown(self):
         # TODO (joao): automatic compilation, i.e. compilation when `cache_implementation="static"` is used, leaves
         # some memory allocated in the cache, which means some object is not being released properly. This causes some
         # unoptimal memory usage, e.g. after certain tests a 7B model in FP16 no longer fits in a 24GB GPU.
         # Investigate the root cause.
-        cleanup(torch_device, gc_collect=False)
+        cleanup(torch_device, gc_collect=True)
 
     @slow
-    @require_read_token
     def test_llama_3_1_hard(self):
         """
         An integration test for llama 3.1. It tests against a long output to ensure the subtle numerical differences
         from llama 3.1.'s RoPE can be detected
         """
-        # diff on `EXPECTED_TEXT`:
-        # 2024-08-26: updating from torch 2.3.1 to 2.4.0 slightly changes the results.
-        expected_base_text = (
-            "Tell me about the french revolution. The french revolution was a period of radical political and social "
-            "upheaval in France that lasted from 1789 until 1799. It was a time of great change and upheaval, marked "
-            "by the overthrow of the monarchy, the rise of the middle class, and the eventual establishment of the "
-            "First French Republic.\nThe revolution began in 1789 with the Estates-General, a representative "
-            "assembly that had not met since 1614. The Third Estate, which represented the common people, "
-            "demanded greater representation and eventually broke away to form the National Assembly. This marked "
-            "the beginning of the end of the absolute monarchy and the rise of the middle class.\n"
-        )
         expected_texts = Expectations(
             {
-                ("rocm", (9, 5)): expected_base_text.replace("political and social", "social and political"),
-                ("cuda", None): expected_base_text,
+                ("rocm", (9, 5)): 'Tell me about the french revolution. The french revolution was a period of radical social and political upheaval in France that lasted from 1789 until 1799. It was a time of great change and upheaval, marked by the overthrow of the monarchy, the rise of the middle class, and the eventual establishment of the First French Republic.\nThe revolution began in 1789 with the Estates-General, a representative assembly that had not met since 1614. The Third Estate, which represented the common people, demanded greater representation and eventually broke away to form the National Assembly. This marked the beginning of the end of the absolute monarchy and the rise of the middle class.\n',
+                ("cuda", None): 'Tell me about the french revolution. The french revolution was a period of radical political and social upheaval in France that lasted from 1789 until 1799. It was a time of great change and upheaval, marked by the overthrow of the monarchy, the rise of the middle class, and the eventual establishment of the First French Republic.\nThe revolution began in 1789 with the Estates-General, a representative assembly that had not met since 1614. The Third Estate, which represented the common people, demanded greater representation and eventually broke away to form the National Assembly. The National Assembly adopted the Declaration of the Rights of Man and of the Citizen, which enshr',
             }
         )  # fmt: skip
         EXPECTED_TEXT = expected_texts.get_expectation()
@@ -142,7 +135,6 @@ class LlamaIntegrationTest(unittest.TestCase):
         self.assertEqual(generated_text, EXPECTED_TEXT)
 
     @slow
-    @require_read_token
     def test_model_7b_logits_bf16(self):
         input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338]
 
@@ -191,7 +183,6 @@ class LlamaIntegrationTest(unittest.TestCase):
         )
 
     @slow
-    @require_read_token
     def test_model_7b_logits(self):
         input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338]
 
@@ -240,6 +231,9 @@ class LlamaIntegrationTest(unittest.TestCase):
             )
         )
 
+    # TODO: check why we have the following strange situation.
+    # without running in subprocess, this test causes subsequent tests failing with `RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0!`
+    @run_test_using_subprocess
     @slow
     def test_model_7b_dola_generation(self):
         # ground truth text generated with dola_layers="low", repetition_penalty=1.2
@@ -265,7 +259,6 @@ class LlamaIntegrationTest(unittest.TestCase):
 
     @slow
     @require_torch_accelerator
-    @require_read_token
     def test_compile_static_cache(self):
         # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
         # work as intended. See https://github.com/pytorch/pytorch/issues/121943
@@ -306,7 +299,6 @@ class LlamaIntegrationTest(unittest.TestCase):
         self.assertEqual(EXPECTED_TEXT_COMPLETION, static_text)
 
     @slow
-    @require_read_token
     def test_export_static_cache(self):
         if version.parse(torch.__version__) < version.parse("2.4.0"):
             self.skipTest(reason="This test requires torch >= 2.4 to run.")
diff --git a/tests/models/llama/test_tokenization_llama.py b/tests/models/llama/test_tokenization_llama.py
index aa2cf161036..927aa54fa08 100644
--- a/tests/models/llama/test_tokenization_llama.py
+++ b/tests/models/llama/test_tokenization_llama.py
@@ -407,6 +407,8 @@ class LlamaIntegrationTest(unittest.TestCase):
         self.tokenizer.add_eos_token = False
         self.rust_tokenizer.add_eos_token = False
 
+    # See internal discussion: https://huggingface.slack.com/archives/C01NE71C4F7/p1750680376085749?thread_ts=1750676268.233309&cid=C01NE71C4F7
+    @unittest.skip("failing, won't fix")
     @slow
     def test_conversion(self):
         # This is excruciatingly slow since it has to recreate the entire merge

From e8e0c76162263840661fc0ca0da3952861754759 Mon Sep 17 00:00:00 2001
From: Chong You <cyou@google.com>
Date: Tue, 1 Jul 2025 22:11:03 -0400
Subject: [PATCH 5/5] Add activation sparsity reference in gemma3n doc (#39160)

Add activation sparsity reference in the description of gemma3n
---
 docs/source/en/model_doc/gemma3n.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/en/model_doc/gemma3n.md b/docs/source/en/model_doc/gemma3n.md
index d38368e8290..423261da04a 100644
--- a/docs/source/en/model_doc/gemma3n.md
+++ b/docs/source/en/model_doc/gemma3n.md
@@ -29,7 +29,7 @@ rendered properly in your Markdown viewer.
 Gemma3n is a multimodal model with pretrained and instruction-tuned variants, available in E4B and E2B sizes. While
 large portions of the language model architecture are shared with prior Gemma releases, there are many new additions in
 this model, including [Alternating Updates][altup] (AltUp), [Learned Augmented Residual Layer][laurel] (LAuReL),
-[MatFormer][matformer], Per-Layer Embeddings (PLE), activation sparsity, and KV cache sharing. The language model uses
+[MatFormer][matformer], Per-Layer Embeddings (PLE), [Activation Sparsity with Statistical Top-k][spark-transformer], and KV cache sharing. The language model uses
 a similar attention pattern to [Gemma 3](./gemma3.md) with alternating 4 local sliding window self-attention layers for
 every global self-attention layer with a maximum context length of 32k tokens. Gemma 3n introduces
 [MobileNet v5][mobilenetv5] as the vision encoder, using a default resolution of 768x768 pixels, and adds a newly
@@ -201,4 +201,5 @@ echo -e "Plants create energy through a process known as" | transformers run --t
 [gemma3n-collection]: https://huggingface.co/collections/google/gemma-3n
 [laurel]: https://arxiv.org/abs/2411.07501
 [matformer]: https://arxiv.org/abs/2310.07707
+[spark-transformer]: https://arxiv.org/abs/2506.06644
 [usm]: https://arxiv.org/abs/2303.01037