Merge branch 'main' into fix-eomt-for-pipeline

2025-07-20 13:08:21 +06:00 · 2025-07-02 15:05:54 +05:30 · 2025-07-02 15:05:54 +05:30 · 54bb1ac314
commit 54bb1ac314
parent ced40e4db6 e8e0c76162
16 changed files with 652 additions and 86 deletions
--- a/.github/workflows/get-pr-info.yml
+++ b/.github/workflows/get-pr-info.yml
@ -0,0 +1,157 @@
+name: Get PR commit SHA
+on:
+  workflow_call:
+    inputs:
+      pr_number:
+        required: true
+        type: string
+    outputs:
+      PR_HEAD_REPO_FULL_NAME:
+        description: "The full name of the repository from which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_FULL_NAME }}
+      PR_BASE_REPO_FULL_NAME:
+        description: "The full name of the repository to which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_FULL_NAME }}
+      PR_HEAD_REPO_OWNER:
+        description: "The owner of the repository from which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}
+      PR_BASE_REPO_OWNER:
+        description: "The owner of the repository to which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_OWNER }}
+      PR_HEAD_REPO_NAME:
+        description: "The name of the repository from which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}
+      PR_BASE_REPO_NAME:
+        description: "The name of the repository to which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_NAME }}
+      PR_HEAD_REF:
+        description: "The branch name of the pull request in the head repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REF }}
+      PR_BASE_REF:
+        description: "The branch name in the base repository (to merge into)"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REF }}
+      PR_HEAD_SHA:
+        description: "The head sha of the pull request branch in the head repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_SHA }}
+      PR_BASE_SHA:
+        description: "The head sha of the target branch in the base repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_SHA }}
+      PR_MERGE_COMMIT_SHA:
+        description: "The sha of the merge commit for the pull request (created by GitHub) in the base repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_SHA }}
+      PR_HEAD_COMMIT_DATE:
+        description: "The date of the head sha of the pull request branch in the head repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_DATE }}
+      PR_MERGE_COMMIT_DATE:
+        description: "The date of the merge commit for the pull request (created by GitHub) in the base repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }}
+      PR_HEAD_COMMIT_TIMESTAMP:
+        description: "The timestamp of the head sha of the pull request branch in the head repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_TIMESTAMP }}
+      PR_MERGE_COMMIT_TIMESTAMP:
+        description: "The timestamp of the merge commit for the pull request (created by GitHub) in the base repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
+      PR:
+        description: "The PR"
+        value: ${{ jobs.get-pr-info.outputs.PR }}
+      PR_FILES:
+        description: "The files touched in the PR"
+        value: ${{ jobs.get-pr-info.outputs.PR_FILES }}
+
+
+jobs:
+  get-pr-info:
+    runs-on: ubuntu-22.04
+    name: Get PR commit SHA better
+    outputs:
+      PR_HEAD_REPO_FULL_NAME: ${{ steps.pr_info.outputs.head_repo_full_name }}
+      PR_BASE_REPO_FULL_NAME: ${{ steps.pr_info.outputs.base_repo_full_name }}
+      PR_HEAD_REPO_OWNER: ${{ steps.pr_info.outputs.head_repo_owner }}
+      PR_BASE_REPO_OWNER: ${{ steps.pr_info.outputs.base_repo_owner }}
+      PR_HEAD_REPO_NAME: ${{ steps.pr_info.outputs.head_repo_name }}
+      PR_BASE_REPO_NAME: ${{ steps.pr_info.outputs.base_repo_name }}
+      PR_HEAD_REF: ${{ steps.pr_info.outputs.head_ref }}
+      PR_BASE_REF: ${{ steps.pr_info.outputs.base_ref }}
+      PR_HEAD_SHA: ${{ steps.pr_info.outputs.head_sha }}
+      PR_BASE_SHA: ${{ steps.pr_info.outputs.base_sha }}
+      PR_MERGE_COMMIT_SHA: ${{ steps.pr_info.outputs.merge_commit_sha }}
+      PR_HEAD_COMMIT_DATE: ${{ steps.pr_info.outputs.head_commit_date }}
+      PR_MERGE_COMMIT_DATE: ${{ steps.pr_info.outputs.merge_commit_date }}
+      PR_HEAD_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.head_commit_timestamp }}
+      PR_MERGE_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.merge_commit_timestamp }}
+      PR: ${{ steps.pr_info.outputs.pr }}
+      PR_FILES: ${{ steps.pr_info.outputs.files }}
+    if: ${{ inputs.pr_number != '' }}
+    steps:
+      - name: Extract PR details
+        id: pr_info
+        uses: actions/github-script@v6
+        with:
+          script: |            
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: ${{ inputs.pr_number }}
+            });
+
+            const { data: head_commit }  = await github.rest.repos.getCommit({
+              owner: pr.head.repo.owner.login,
+              repo: pr.head.repo.name,
+              ref: pr.head.ref
+            });
+
+            const { data: merge_commit }  = await github.rest.repos.getCommit({
+              owner: pr.base.repo.owner.login,
+              repo: pr.base.repo.name,
+              ref: pr.merge_commit_sha,
+            });
+
+            const { data: files } = await github.rest.pulls.listFiles({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: ${{ inputs.pr_number }}
+            });
+
+            core.setOutput('head_repo_full_name', pr.head.repo.full_name);
+            core.setOutput('base_repo_full_name', pr.base.repo.full_name);
+            core.setOutput('head_repo_owner', pr.head.repo.owner.login);
+            core.setOutput('base_repo_owner', pr.base.repo.owner.login);
+            core.setOutput('head_repo_name', pr.head.repo.name);
+            core.setOutput('base_repo_name', pr.base.repo.name);
+            core.setOutput('head_ref', pr.head.ref);
+            core.setOutput('base_ref', pr.base.ref);
+            core.setOutput('head_sha', pr.head.sha);
+            core.setOutput('base_sha', pr.base.sha);
+            core.setOutput('merge_commit_sha', pr.merge_commit_sha);
+            core.setOutput('pr', pr);
+
+            core.setOutput('head_commit_date', head_commit.commit.committer.date);
+            core.setOutput('merge_commit_date', merge_commit.commit.committer.date);
+            
+            core.setOutput('files', files);            
+            
+            console.log('PR head commit:', {
+              head_commit: head_commit,
+              commit: head_commit.commit,
+              date: head_commit.commit.committer.date
+            });
+
+            console.log('PR merge commit:', {
+              merge_commit: merge_commit,
+              commit: merge_commit.commit,
+              date: merge_commit.commit.committer.date
+            });
+
+      - name: Convert dates to timestamps
+        id: get_timestamps
+        run: |
+          head_commit_date=${{ steps.pr_info.outputs.head_commit_date }}
+          merge_commit_date=${{ steps.pr_info.outputs.merge_commit_date }}
+          echo $head_commit_date
+          echo $merge_commit_date
+          head_commit_timestamp=$(date -d "$head_commit_date" +%s)
+          merge_commit_timestamp=$(date -d "$merge_commit_date" +%s)
+          echo $head_commit_timestamp
+          echo $merge_commit_timestamp
+          echo "head_commit_timestamp=$head_commit_timestamp" >> $GITHUB_OUTPUT
+          echo "merge_commit_timestamp=$merge_commit_timestamp" >> $GITHUB_OUTPUT
--- a/.github/workflows/get-pr-number.yml
+++ b/.github/workflows/get-pr-number.yml
@ -0,0 +1,36 @@
+name: Get PR number
+on:
+  workflow_call:
+    outputs:
+      PR_NUMBER:
+        description: "The extracted PR number"
+        value: ${{ jobs.get-pr-number.outputs.PR_NUMBER }}
+
+jobs:
+  get-pr-number:
+    runs-on: ubuntu-22.04
+    name: Get PR number
+    outputs:
+      PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
+    steps:
+      - name: Get PR number
+        shell: bash
+        run: |
+          if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
+            echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
+          elif [[ "${{ github.event.pull_request.number }}" != "" ]]; then
+            echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
+          elif [[ "${{ github.event.pull_request }}" != "" ]]; then
+            echo "PR_NUMBER=${{ github.event.number }}" >> $GITHUB_ENV
+          else
+            echo "PR_NUMBER=" >> $GITHUB_ENV
+          fi
+
+      - name: Check PR number
+        shell: bash
+        run: |
+          echo "${{ env.PR_NUMBER }}"
+
+      - name: Set PR number
+        id: set_pr_number
+        run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
--- a/.github/workflows/pr_run_slow_ci.yml
+++ b/.github/workflows/pr_run_slow_ci.yml
@ -0,0 +1,163 @@
+name: PR slow CI
+on:
+  pull_request_target:
+    types: [opened, synchronize, reopened]
+
+jobs:
+  get-pr-number:
+    name: Get PR number
+    uses: ./.github/workflows/get-pr-number.yml
+
+  get-pr-info:
+    name: Get PR commit SHA
+    needs: get-pr-number
+    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
+    uses: ./.github/workflows/get-pr-info.yml
+    with:
+      pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
+
+  # We only need to verify the timestamp if the workflow is triggered by `issue_comment`.
+  verity_pr_commit:
+    name: Verity PR commit corresponds to a specific event by comparing timestamps
+    if: ${{ github.event.comment.created_at != '' }}
+    runs-on: ubuntu-22.04
+    needs: get-pr-info
+    env:
+      COMMENT_DATE: ${{ github.event.comment.created_at }}
+      PR_MERGE_COMMIT_DATE: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }}
+      PR_MERGE_COMMIT_TIMESTAMP: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
+    steps:
+      - run: |
+          COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s")
+          echo "COMMENT_DATE: $COMMENT_DATE"
+          echo "PR_MERGE_COMMIT_DATE: $PR_MERGE_COMMIT_DATE"
+          echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP"
+          echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP"
+          if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then
+            echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!";
+            exit -1;
+          fi
+
+  get-jobs:
+    name: Get test files to run
+    runs-on: ubuntu-22.04
+    needs: [get-pr-number, get-pr-info]
+    outputs:
+      jobs: ${{ steps.get_jobs.outputs.jobs_to_run }}
+    steps:
+      - name: Get repository content
+        id: repo_content
+        uses: actions/github-script@v6
+        with:
+          script: |
+            const { data: tests_dir } = await github.rest.repos.getContent({
+              owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
+              repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
+              path: 'tests',
+              ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
+            });
+
+            const { data: tests_models_dir } = await github.rest.repos.getContent({
+              owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
+              repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
+              path: 'tests/models',
+              ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
+            });
+
+            const { data: tests_quantization_dir } = await github.rest.repos.getContent({
+              owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
+              repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
+              path: 'tests/quantization',
+              ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
+            });
+
+            core.setOutput('tests_dir', tests_dir);
+            core.setOutput('tests_models_dir', tests_models_dir);
+            core.setOutput('tests_quantization_dir', tests_quantization_dir);
+
+      # This checkout to the main branch
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: "0"
+
+      - name: Write pr_files file
+        run: |
+          cat > pr_files.txt << 'EOF'
+          ${{ needs.get-pr-info.outputs.PR_FILES }}
+          EOF
+
+      - name: Write tests_dir file
+        run: |
+          cat > tests_dir.txt << 'EOF'
+          ${{ steps.repo_content.outputs.tests_dir }}
+          EOF
+
+      - name: Write tests_models_dir file
+        run: |
+          cat > tests_models_dir.txt << 'EOF'
+          ${{ steps.repo_content.outputs.tests_models_dir }}
+          EOF
+
+      - name: Write tests_quantization_dir file
+        run: |
+          cat > tests_quantization_dir.txt << 'EOF'
+          ${{ steps.repo_content.outputs.tests_quantization_dir }}
+          EOF
+
+      - name: Run script to get jobs to run
+        id: get_jobs
+        run: |
+          python utils/get_pr_run_slow_jobs.py | tee output.txt
+          echo "jobs_to_run: $(tail -n 1 output.txt)"
+          echo "jobs_to_run=$(tail -n 1 output.txt)" >> $GITHUB_OUTPUT
+
+  send_comment:
+    name: Send a comment to suggest jobs to run
+    if: ${{ needs.get-jobs.outputs.jobs != '' }}
+    needs: [get-pr-number, get-jobs]
+    permissions:
+      pull-requests: write
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Delete existing comment and send new one
+        uses: actions/github-script@v7
+        env:
+          BODY: "\n\nrun-slow: ${{ needs.get-jobs.outputs.jobs }}"
+        with:
+          script: |
+            const prNumber = ${{ needs.get-pr-number.outputs.PR_NUMBER }};
+            const commentPrefix = "**[For maintainers]** Suggested jobs to run (before merge)";
+            
+            // Get all comments on the PR
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber
+            });
+            
+            // Find existing comment(s) that start with our prefix
+            const existingComments = comments.filter(comment => 
+              comment.user.login === 'github-actions[bot]' && 
+              comment.body.startsWith(commentPrefix)
+            );
+            
+            // Delete existing comment(s)
+            for (const comment of existingComments) {
+              console.log(`Deleting existing comment #${comment.id}`);
+              await github.rest.issues.deleteComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: comment.id
+              });
+            }
+            
+            // Create new comment
+            const newBody = `${commentPrefix}${process.env.BODY}`;
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber,
+              body: newBody
+            });
+            
+            console.log('✅ Comment updated successfully');
--- a/docs/source/en/model_doc/gemma3n.md
+++ b/docs/source/en/model_doc/gemma3n.md
@ -29,7 +29,7 @@ rendered properly in your Markdown viewer.
 Gemma3n is a multimodal model with pretrained and instruction-tuned variants, available in E4B and E2B sizes. While
 large portions of the language model architecture are shared with prior Gemma releases, there are many new additions in
 this model, including [Alternating Updates][altup] (AltUp), [Learned Augmented Residual Layer][laurel] (LAuReL),
-[MatFormer][matformer], Per-Layer Embeddings (PLE), activation sparsity, and KV cache sharing. The language model uses
+[MatFormer][matformer], Per-Layer Embeddings (PLE), [Activation Sparsity with Statistical Top-k][spark-transformer], and KV cache sharing. The language model uses
 a similar attention pattern to [Gemma 3](./gemma3.md) with alternating 4 local sliding window self-attention layers for
 every global self-attention layer with a maximum context length of 32k tokens. Gemma 3n introduces
 [MobileNet v5][mobilenetv5] as the vision encoder, using a default resolution of 768x768 pixels, and adds a newly
@ -201,4 +201,5 @@ echo -e "Plants create energy through a process known as" | transformers run --t
 [gemma3n-collection]: https://huggingface.co/collections/google/gemma-3n
 [laurel]: https://arxiv.org/abs/2411.07501
 [matformer]: https://arxiv.org/abs/2310.07707
+[spark-transformer]: https://arxiv.org/abs/2506.06644
 [usm]: https://arxiv.org/abs/2303.01037
--- a/tests/models/conditional_detr/test_modeling_conditional_detr.py
+++ b/tests/models/conditional_detr/test_modeling_conditional_detr.py
@ -570,9 +570,14 @@ class ConditionalDetrModelIntegrationTests(unittest.TestCase):
        expected_shape = torch.Size((1, 300, 256))
        self.assertEqual(outputs.last_hidden_state.shape, expected_shape)
        expected_slice = torch.tensor(
-            [[0.4222, 0.7471, 0.8760], [0.6395, -0.2729, 0.7127], [-0.3090, 0.7642, 0.9529]]
+            [
+                [0.4223, 0.7474, 0.8760],
+                [0.6397, -0.2727, 0.7126],
+                [-0.3089, 0.7643, 0.9529],
+            ]
        ).to(torch_device)
-        torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
+
+        torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)

    def test_inference_object_detection_head(self):
        model = ConditionalDetrForObjectDetection.from_pretrained("microsoft/conditional-detr-resnet-50").to(
@ -592,26 +597,34 @@ class ConditionalDetrModelIntegrationTests(unittest.TestCase):
        expected_shape_logits = torch.Size((1, model.config.num_queries, model.config.num_labels))
        self.assertEqual(outputs.logits.shape, expected_shape_logits)
        expected_slice_logits = torch.tensor(
-            [[-10.4372, -5.7558, -8.6764], [-10.5410, -5.8704, -8.0590], [-10.6827, -6.3469, -8.3923]]
+            [
+                [-10.4371, -5.7565, -8.6765],
+                [-10.5413, -5.8700, -8.0589],
+                [-10.6824, -6.3477, -8.3927],
+            ]
        ).to(torch_device)
-        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, rtol=2e-4, atol=2e-4)

        expected_shape_boxes = torch.Size((1, model.config.num_queries, 4))
        self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
        expected_slice_boxes = torch.tensor(
-            [[0.7733, 0.6576, 0.4496], [0.5171, 0.1184, 0.9094], [0.8846, 0.5647, 0.2486]]
+            [
+                [0.7733, 0.6576, 0.4496],
+                [0.5171, 0.1184, 0.9095],
+                [0.8846, 0.5647, 0.2486],
+            ]
        ).to(torch_device)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, rtol=2e-4, atol=2e-4)

        # verify postprocessing
        results = image_processor.post_process_object_detection(
            outputs, threshold=0.3, target_sizes=[image.size[::-1]]
        )[0]
-        expected_scores = torch.tensor([0.8330, 0.8313, 0.8039, 0.6829, 0.5355]).to(torch_device)
+        expected_scores = torch.tensor([0.8330, 0.8315, 0.8039, 0.6829, 0.5354]).to(torch_device)
        expected_labels = [75, 17, 17, 75, 63]
-        expected_slice_boxes = torch.tensor([38.3089, 72.1022, 177.6293, 118.4512]).to(torch_device)
+        expected_slice_boxes = torch.tensor([38.3109, 72.1002, 177.6301, 118.4511]).to(torch_device)

        self.assertEqual(len(results["scores"]), 5)
-        torch.testing.assert_close(results["scores"], expected_scores, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(results["scores"], expected_scores, rtol=2e-4, atol=2e-4)
        self.assertSequenceEqual(results["labels"].tolist(), expected_labels)
        torch.testing.assert_close(results["boxes"][0, :], expected_slice_boxes)
--- a/tests/models/convnext/test_modeling_convnext.py
+++ b/tests/models/convnext/test_modeling_convnext.py
@ -286,9 +286,9 @@ class ConvNextModelIntegrationTest(unittest.TestCase):
        expected_shape = torch.Size((1, 1000))
        self.assertEqual(outputs.logits.shape, expected_shape)

-        expected_slice = torch.tensor([-0.0260, -0.4739, 0.1911]).to(torch_device)
+        expected_slice = torch.tensor([-0.0261, -0.4739, 0.1910]).to(torch_device)

-        torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)


@require_torch
--- a/tests/models/cvt/test_modeling_cvt.py
+++ b/tests/models/cvt/test_modeling_cvt.py
@ -185,6 +185,10 @@ class CvtModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def test_model_get_set_embeddings(self):
        pass

+    # Larger differences on A10 than T4
+    def test_batching_equivalence(self, atol=2e-4, rtol=2e-4):
+        super().test_batching_equivalence(atol=atol, rtol=rtol)
+
    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
@ -265,6 +269,6 @@ class CvtModelIntegrationTest(unittest.TestCase):
        expected_shape = torch.Size((1, 1000))
        self.assertEqual(outputs.logits.shape, expected_shape)

-        expected_slice = torch.tensor([0.9285, 0.9015, -0.3150]).to(torch_device)
+        expected_slice = torch.tensor([0.9287, 0.9016, -0.3152]).to(torch_device)

-        torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)
--- a/tests/models/d_fine/test_modeling_d_fine.py
+++ b/tests/models/d_fine/test_modeling_d_fine.py
@ -758,6 +758,7 @@ def prepare_img():

@require_torch
@require_vision
+@slow
 class DFineModelIntegrationTest(unittest.TestCase):
    @cached_property
    def default_image_processor(self):
@ -778,37 +779,38 @@ class DFineModelIntegrationTest(unittest.TestCase):

        expected_logits = torch.tensor(
            [
-                [-3.8097816, -4.7724586, -5.994499],
-                [-5.2974715, -9.499067, -6.1653666],
-                [-5.3502765, -3.9530406, -6.3630295],
+                [-3.8221, -4.7679, -6.0063],
+                [-5.2994, -9.5009, -6.1697],
+                [-5.3103, -3.8005, -6.2972],
            ]
        ).to(torch_device)
        expected_boxes = torch.tensor(
            [
-                [0.7677696, 0.41479152, 0.46441072],
-                [0.16912134, 0.19869131, 0.2123824],
-                [0.2581653, 0.54818195, 0.47512347],
+                [0.7678, 0.4148, 0.4644],
+                [0.1691, 0.1987, 0.2124],
+                [0.2582, 0.5482, 0.4751],
            ]
        ).to(torch_device)

-        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, atol=1e-4, rtol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, atol=2e-4, rtol=2e-4)

        expected_shape_boxes = torch.Size((1, 300, 4))
        self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=1e-4, rtol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=2e-4, rtol=2e-4)

        # verify postprocessing
        results = image_processor.post_process_object_detection(
            outputs, threshold=0.0, target_sizes=[image.size[::-1]]
        )[0]
-        expected_scores = torch.tensor([0.9642, 0.9542, 0.9536, 0.8548], device=torch_device)
+
+        expected_scores = torch.tensor([0.9616, 0.9541, 0.9541, 0.8551], device=torch_device)
        expected_labels = [15, 65, 15, 57]
        expected_slice_boxes = torch.tensor(
            [
-                [1.3186283e01, 5.4130211e01, 3.1726535e02, 4.7212445e02],
-                [4.0275269e01, 7.2975174e01, 1.7620003e02, 1.1776848e02],
-                [3.4276117e02, 2.3427944e01, 6.3998401e02, 3.7477191e02],
-                [5.8418274e-01, 1.1794567e00, 6.3933154e02, 4.7485995e02],
+                [1.3358e01, 5.4123e01, 3.1726e02, 4.7222e02],
+                [4.0274e01, 7.2972e01, 1.7620e02, 1.1777e02],
+                [3.4270e02, 2.3427e01, 6.3998e02, 3.7476e02],
+                [5.7796e-01, 1.1773e00, 6.3933e02, 4.7486e02],
            ],
            device=torch_device,
        )
--- a/tests/models/dab_detr/test_modeling_dab_detr.py
+++ b/tests/models/dab_detr/test_modeling_dab_detr.py
@ -787,7 +787,11 @@ class DabDetrModelIntegrationTests(unittest.TestCase):
        expected_shape = torch.Size((1, 300, 256))
        self.assertEqual(outputs.last_hidden_state.shape, expected_shape)
        expected_slice = torch.tensor(
-            [[-0.4879, -0.2594, 0.4524], [-0.4997, -0.4258, 0.4329], [-0.8220, -0.4996, 0.0577]]
+            [
+                [-0.4878, -0.2593, 0.4521],
+                [-0.4999, -0.4257, 0.4326],
+                [-0.8220, -0.4997, 0.0578],
+            ]
        ).to(torch_device)
        torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, atol=2e-4, rtol=2e-4)

@ -806,26 +810,34 @@ class DabDetrModelIntegrationTests(unittest.TestCase):
        expected_shape_logits = torch.Size((1, model.config.num_queries, model.config.num_labels))
        self.assertEqual(outputs.logits.shape, expected_shape_logits)
        expected_slice_logits = torch.tensor(
-            [[-10.1765, -5.5243, -8.9324], [-9.8138, -5.6721, -7.5161], [-10.3054, -5.6081, -8.5931]]
+            [
+                [-10.1764, -5.5247, -8.9324],
+                [-9.8137, -5.6730, -7.5163],
+                [-10.3056, -5.6075, -8.5935],
+            ]
        ).to(torch_device)
        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, atol=3e-4, rtol=3e-4)

        expected_shape_boxes = torch.Size((1, model.config.num_queries, 4))
        self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
        expected_slice_boxes = torch.tensor(
-            [[0.3708, 0.3000, 0.2753], [0.5211, 0.6125, 0.9495], [0.2897, 0.6730, 0.5459]]
+            [
+                [0.3708, 0.3000, 0.2754],
+                [0.5211, 0.6126, 0.9494],
+                [0.2897, 0.6731, 0.5460],
+            ]
        ).to(torch_device)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, atol=1e-4, rtol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, atol=3e-4, rtol=3e-4)

        # verify postprocessing
        results = image_processor.post_process_object_detection(
            outputs, threshold=0.3, target_sizes=[image.size[::-1]]
        )[0]
-        expected_scores = torch.tensor([0.8732, 0.8563, 0.8554, 0.6079, 0.5896]).to(torch_device)
+        expected_scores = torch.tensor([0.8732, 0.8563, 0.8554, 0.6080, 0.5895]).to(torch_device)
        expected_labels = [17, 75, 17, 75, 63]
-        expected_boxes = torch.tensor([14.6970, 49.3892, 320.5165, 469.2765]).to(torch_device)
+        expected_boxes = torch.tensor([14.6931, 49.3886, 320.5176, 469.2762]).to(torch_device)

        self.assertEqual(len(results["scores"]), 5)
-        torch.testing.assert_close(results["scores"], expected_scores, atol=1e-4, rtol=1e-4)
+        torch.testing.assert_close(results["scores"], expected_scores, atol=3e-4, rtol=3e-4)
        self.assertSequenceEqual(results["labels"].tolist(), expected_labels)
-        torch.testing.assert_close(results["boxes"][0, :], expected_boxes, atol=1e-4, rtol=1e-4)
+        torch.testing.assert_close(results["boxes"][0, :], expected_boxes, atol=3e-4, rtol=3e-4)
--- a/tests/models/deformable_detr/test_modeling_deformable_detr.py
+++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py
@ -677,30 +677,38 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase):
        self.assertEqual(outputs.logits.shape, expected_shape_logits)

        expected_logits = torch.tensor(
-            [[-9.6645, -4.3449, -5.8705], [-9.7035, -3.8504, -5.0724], [-10.5634, -5.3379, -7.5116]]
+            [
+                [-9.6644, -4.3434, -5.8707],
+                [-9.7035, -3.8503, -5.0721],
+                [-10.5633, -5.3387, -7.5119],
+            ]
        ).to(torch_device)
        expected_boxes = torch.tensor(
-            [[0.8693, 0.2289, 0.2492], [0.3150, 0.5489, 0.5845], [0.5563, 0.7580, 0.8518]]
+            [
+                [0.8693, 0.2290, 0.2492],
+                [0.3150, 0.5489, 0.5845],
+                [0.5563, 0.7580, 0.8518],
+            ]
        ).to(torch_device)

-        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, rtol=2e-4, atol=2e-4)

        expected_shape_boxes = torch.Size((1, model.config.num_queries, 4))
        self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, rtol=2e-4, atol=2e-4)

        # verify postprocessing
        results = image_processor.post_process_object_detection(
            outputs, threshold=0.3, target_sizes=[image.size[::-1]]
        )[0]
-        expected_scores = torch.tensor([0.7999, 0.7894, 0.6331, 0.4720, 0.4382]).to(torch_device)
+        expected_scores = torch.tensor([0.7999, 0.7895, 0.6332, 0.4719, 0.4382]).to(torch_device)
        expected_labels = [17, 17, 75, 75, 63]
-        expected_slice_boxes = torch.tensor([16.5028, 52.8390, 318.2544, 470.7841]).to(torch_device)
+        expected_slice_boxes = torch.tensor([16.4960, 52.8387, 318.2565, 470.7831]).to(torch_device)

        self.assertEqual(len(results["scores"]), 5)
-        torch.testing.assert_close(results["scores"], expected_scores, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(results["scores"], expected_scores, rtol=2e-4, atol=2e-4)
        self.assertSequenceEqual(results["labels"].tolist(), expected_labels)
-        torch.testing.assert_close(results["boxes"][0, :], expected_slice_boxes)
+        torch.testing.assert_close(results["boxes"][0, :], expected_slice_boxes, rtol=2e-4, atol=2e-4)

    def test_inference_object_detection_head_with_box_refine_two_stage(self):
        model = DeformableDetrForObjectDetection.from_pretrained(
@ -720,17 +728,25 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase):
        self.assertEqual(outputs.logits.shape, expected_shape_logits)

        expected_logits = torch.tensor(
-            [[-6.7108, -4.3213, -6.3777], [-8.9014, -6.1799, -6.7240], [-6.9315, -4.4735, -6.2298]]
+            [
+                [-6.7112, -4.3216, -6.3781],
+                [-8.9035, -6.1738, -6.7249],
+                [-6.9314, -4.4736, -6.2303],
+            ]
        ).to(torch_device)
        expected_boxes = torch.tensor(
-            [[0.2583, 0.5499, 0.4683], [0.7652, 0.9068, 0.4882], [0.5490, 0.2763, 0.0564]]
+            [
+                [0.2582, 0.5499, 0.4683],
+                [0.7652, 0.9084, 0.4884],
+                [0.5490, 0.2763, 0.0564],
+            ]
        ).to(torch_device)

-        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, rtol=2e-4, atol=2e-4)

        expected_shape_boxes = torch.Size((1, model.config.num_queries, 4))
        self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, rtol=2e-4, atol=2e-4)

    @require_torch_accelerator
    def test_inference_object_detection_head_equivalence_cpu_accelerator(self):
@ -753,10 +769,15 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase):
            gpu_outputs = model(pixel_values.to(torch_device), pixel_mask.to(torch_device))

        # 3. assert equivalence
+        # (on A10, the differences get larger than on T4)
        for key in cpu_outputs.keys():
-            assert torch.allclose(cpu_outputs[key], gpu_outputs[key].cpu(), atol=1e-4)
+            torch.testing.assert_close(cpu_outputs[key], gpu_outputs[key].cpu(), atol=2e-2, rtol=2e-2)

        expected_logits = torch.tensor(
-            [[-9.9051, -4.2541, -6.4852], [-9.6947, -4.0854, -6.8033], [-10.0665, -5.8470, -7.7003]]
+            [
+                [-9.9051, -4.2541, -6.4852],
+                [-9.6947, -4.0854, -6.8033],
+                [-10.0665, -5.8470, -7.7003],
+            ]
        )
-        assert torch.allclose(cpu_outputs.logits[0, :3, :3], expected_logits, atol=1e-4)
+        assert torch.allclose(cpu_outputs.logits[0, :3, :3], expected_logits, atol=2e-4)
--- a/tests/models/detr/test_modeling_detr.py
+++ b/tests/models/detr/test_modeling_detr.py
@ -586,9 +586,13 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
        expected_shape = torch.Size((1, 100, 256))
        assert outputs.last_hidden_state.shape == expected_shape
        expected_slice = torch.tensor(
-            [[0.0616, -0.5146, -0.4032], [-0.7629, -0.4934, -1.7153], [-0.4768, -0.6403, -0.7826]]
+            [
+                [0.0622, -0.5142, -0.4034],
+                [-0.7628, -0.4935, -1.7153],
+                [-0.4751, -0.6386, -0.7818],
+            ]
        ).to(torch_device)
-        torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)

    def test_inference_object_detection_head(self):
        model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50").to(torch_device)
@ -606,16 +610,24 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
        expected_shape_logits = torch.Size((1, model.config.num_queries, model.config.num_labels + 1))
        self.assertEqual(outputs.logits.shape, expected_shape_logits)
        expected_slice_logits = torch.tensor(
-            [[-19.1194, -0.0893, -11.0154], [-17.3640, -1.8035, -14.0219], [-20.0461, -0.5837, -11.1060]]
+            [
+                [-19.1211, -0.0881, -11.0188],
+                [-17.3641, -1.8045, -14.0229],
+                [-20.0415, -0.5833, -11.1005],
+            ]
        ).to(torch_device)
-        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, rtol=2e-4, atol=2e-4)

        expected_shape_boxes = torch.Size((1, model.config.num_queries, 4))
        self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
        expected_slice_boxes = torch.tensor(
-            [[0.4433, 0.5302, 0.8853], [0.5494, 0.2517, 0.0529], [0.4998, 0.5360, 0.9956]]
+            [
+                [0.4433, 0.5302, 0.8852],
+                [0.5494, 0.2517, 0.0529],
+                [0.4998, 0.5360, 0.9955],
+            ]
        ).to(torch_device)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, rtol=2e-4, atol=2e-4)

        # verify postprocessing
        results = image_processor.post_process_object_detection(
@ -623,12 +635,12 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
        )[0]
        expected_scores = torch.tensor([0.9982, 0.9960, 0.9955, 0.9988, 0.9987]).to(torch_device)
        expected_labels = [75, 75, 63, 17, 17]
-        expected_slice_boxes = torch.tensor([40.1633, 70.8115, 175.5471, 117.9841]).to(torch_device)
+        expected_slice_boxes = torch.tensor([40.1615, 70.8090, 175.5476, 117.9810]).to(torch_device)

        self.assertEqual(len(results["scores"]), 5)
-        torch.testing.assert_close(results["scores"], expected_scores, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(results["scores"], expected_scores, rtol=2e-4, atol=2e-4)
        self.assertSequenceEqual(results["labels"].tolist(), expected_labels)
-        torch.testing.assert_close(results["boxes"][0, :], expected_slice_boxes)
+        torch.testing.assert_close(results["boxes"][0, :], expected_slice_boxes, rtol=2e-4, atol=2e-4)

    def test_inference_panoptic_segmentation_head(self):
        model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic").to(torch_device)
@ -646,23 +658,27 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
        expected_shape_logits = torch.Size((1, model.config.num_queries, model.config.num_labels + 1))
        self.assertEqual(outputs.logits.shape, expected_shape_logits)
        expected_slice_logits = torch.tensor(
-            [[-18.1565, -1.7568, -13.5029], [-16.8888, -1.4138, -14.1028], [-17.5709, -2.5080, -11.8654]]
+            [
+                [-18.1523, -1.7592, -13.5019],
+                [-16.8866, -1.4139, -14.1025],
+                [-17.5735, -2.5090, -11.8666],
+            ]
        ).to(torch_device)
-        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.logits[0, :3, :3], expected_slice_logits, rtol=2e-4, atol=2e-4)

        expected_shape_boxes = torch.Size((1, model.config.num_queries, 4))
        self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
        expected_slice_boxes = torch.tensor(
-            [[0.5344, 0.1789, 0.9285], [0.4420, 0.0572, 0.0875], [0.6630, 0.6887, 0.1017]]
+            [[0.5344, 0.1790, 0.9284], [0.4421, 0.0571, 0.0875], [0.6632, 0.6886, 0.1015]]
        ).to(torch_device)
-        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, rtol=1e-4, atol=1e-4)
+        torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, rtol=2e-4, atol=2e-4)

        expected_shape_masks = torch.Size((1, model.config.num_queries, 200, 267))
        self.assertEqual(outputs.pred_masks.shape, expected_shape_masks)
        expected_slice_masks = torch.tensor(
-            [[-7.7558, -10.8788, -11.9797], [-11.8881, -16.4329, -17.7451], [-14.7316, -19.7383, -20.3004]]
+            [[-7.8408, -11.0104, -12.1279], [-12.0299, -16.6498, -17.9806], [-14.8995, -19.9940, -20.5646]]
        ).to(torch_device)
-        torch.testing.assert_close(outputs.pred_masks[0, 0, :3, :3], expected_slice_masks, rtol=1e-3, atol=1e-3)
+        torch.testing.assert_close(outputs.pred_masks[0, 0, :3, :3], expected_slice_masks, rtol=2e-3, atol=2e-3)

        # verify postprocessing
        results = image_processor.post_process_panoptic_segmentation(
@ -674,7 +690,7 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
            torch_device
        )
        expected_number_of_segments = 5
-        expected_first_segment = {"id": 1, "label_id": 17, "was_fused": False, "score": 0.994097}
+        expected_first_segment = {"id": 1, "label_id": 17, "was_fused": False, "score": 0.9941}

        number_of_unique_segments = len(torch.unique(results["segmentation"]))
        self.assertTrue(
@ -716,6 +732,10 @@ class DetrModelIntegrationTests(unittest.TestCase):
        expected_shape = torch.Size((1, 100, 256))
        assert outputs.last_hidden_state.shape == expected_shape
        expected_slice = torch.tensor(
-            [[0.0616, -0.5146, -0.4032], [-0.7629, -0.4934, -1.7153], [-0.4768, -0.6403, -0.7826]]
+            [
+                [0.0622, -0.5142, -0.4034],
+                [-0.7628, -0.4935, -1.7153],
+                [-0.4751, -0.6386, -0.7818],
+            ]
        ).to(torch_device)
        torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@ -25,6 +25,7 @@ from transformers.testing_utils import (
    require_read_token,
    require_torch,
    require_torch_accelerator,
+    run_test_using_subprocess,
    slow,
    torch_device,
 )
@ -96,36 +97,28 @@ class LlamaModelTest(CausalLMModelTest, unittest.TestCase):


@require_torch_accelerator
+@require_read_token
 class LlamaIntegrationTest(unittest.TestCase):
+    def setup(self):
+        cleanup(torch_device, gc_collect=True)
+
    def tearDown(self):
        # TODO (joao): automatic compilation, i.e. compilation when `cache_implementation="static"` is used, leaves
        # some memory allocated in the cache, which means some object is not being released properly. This causes some
        # unoptimal memory usage, e.g. after certain tests a 7B model in FP16 no longer fits in a 24GB GPU.
        # Investigate the root cause.
-        cleanup(torch_device, gc_collect=False)
+        cleanup(torch_device, gc_collect=True)

    @slow
-    @require_read_token
    def test_llama_3_1_hard(self):
        """
        An integration test for llama 3.1. It tests against a long output to ensure the subtle numerical differences
        from llama 3.1.'s RoPE can be detected
        """
-        # diff on `EXPECTED_TEXT`:
-        # 2024-08-26: updating from torch 2.3.1 to 2.4.0 slightly changes the results.
-        expected_base_text = (
-            "Tell me about the french revolution. The french revolution was a period of radical political and social "
-            "upheaval in France that lasted from 1789 until 1799. It was a time of great change and upheaval, marked "
-            "by the overthrow of the monarchy, the rise of the middle class, and the eventual establishment of the "
-            "First French Republic.\nThe revolution began in 1789 with the Estates-General, a representative "
-            "assembly that had not met since 1614. The Third Estate, which represented the common people, "
-            "demanded greater representation and eventually broke away to form the National Assembly. This marked "
-            "the beginning of the end of the absolute monarchy and the rise of the middle class.\n"
-        )
        expected_texts = Expectations(
            {
-                ("rocm", (9, 5)): expected_base_text.replace("political and social", "social and political"),
-                ("cuda", None): expected_base_text,
+                ("rocm", (9, 5)): 'Tell me about the french revolution. The french revolution was a period of radical social and political upheaval in France that lasted from 1789 until 1799. It was a time of great change and upheaval, marked by the overthrow of the monarchy, the rise of the middle class, and the eventual establishment of the First French Republic.\nThe revolution began in 1789 with the Estates-General, a representative assembly that had not met since 1614. The Third Estate, which represented the common people, demanded greater representation and eventually broke away to form the National Assembly. This marked the beginning of the end of the absolute monarchy and the rise of the middle class.\n',
+                ("cuda", None): 'Tell me about the french revolution. The french revolution was a period of radical political and social upheaval in France that lasted from 1789 until 1799. It was a time of great change and upheaval, marked by the overthrow of the monarchy, the rise of the middle class, and the eventual establishment of the First French Republic.\nThe revolution began in 1789 with the Estates-General, a representative assembly that had not met since 1614. The Third Estate, which represented the common people, demanded greater representation and eventually broke away to form the National Assembly. The National Assembly adopted the Declaration of the Rights of Man and of the Citizen, which enshr',
            }
        )  # fmt: skip
        EXPECTED_TEXT = expected_texts.get_expectation()
@ -142,7 +135,6 @@ class LlamaIntegrationTest(unittest.TestCase):
        self.assertEqual(generated_text, EXPECTED_TEXT)

    @slow
-    @require_read_token
    def test_model_7b_logits_bf16(self):
        input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338]

@ -191,7 +183,6 @@ class LlamaIntegrationTest(unittest.TestCase):
        )

    @slow
-    @require_read_token
    def test_model_7b_logits(self):
        input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338]

@ -240,6 +231,9 @@ class LlamaIntegrationTest(unittest.TestCase):
            )
        )

+    # TODO: check why we have the following strange situation.
+    # without running in subprocess, this test causes subsequent tests failing with `RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0!`
+    @run_test_using_subprocess
    @slow
    def test_model_7b_dola_generation(self):
        # ground truth text generated with dola_layers="low", repetition_penalty=1.2
@ -265,7 +259,6 @@ class LlamaIntegrationTest(unittest.TestCase):

    @slow
    @require_torch_accelerator
-    @require_read_token
    def test_compile_static_cache(self):
        # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
        # work as intended. See https://github.com/pytorch/pytorch/issues/121943
@ -306,7 +299,6 @@ class LlamaIntegrationTest(unittest.TestCase):
        self.assertEqual(EXPECTED_TEXT_COMPLETION, static_text)

    @slow
-    @require_read_token
    def test_export_static_cache(self):
        if version.parse(torch.__version__) < version.parse("2.4.0"):
            self.skipTest(reason="This test requires torch >= 2.4 to run.")
--- a/tests/models/llama/test_tokenization_llama.py
+++ b/tests/models/llama/test_tokenization_llama.py
@ -407,6 +407,8 @@ class LlamaIntegrationTest(unittest.TestCase):
        self.tokenizer.add_eos_token = False
        self.rust_tokenizer.add_eos_token = False

+    # See internal discussion: https://huggingface.slack.com/archives/C01NE71C4F7/p1750680376085749?thread_ts=1750676268.233309&cid=C01NE71C4F7
+    @unittest.skip("failing, won't fix")
    @slow
    def test_conversion(self):
        # This is excruciatingly slow since it has to recreate the entire merge
--- a/tests/quantization/bnb/test_4bit.py
+++ b/tests/quantization/bnb/test_4bit.py
@ -27,6 +27,7 @@ from transformers import (
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
+    set_seed,
 )
 from transformers.models.opt.modeling_opt import OPTAttention
 from transformers.testing_utils import (
@ -111,6 +112,8 @@ class Base4bitTest(unittest.TestCase):
    EXPECTED_OUTPUTS.add("Hello my name is John Doe, I am a student at the University")
    EXPECTED_OUTPUTS.add("Hello my name is John and I am 25 years old.")
    EXPECTED_OUTPUTS.add("Hello my name is John and I am a student at the University of")
+    # Expected values on Intel XPU and NV A100
+    EXPECTED_OUTPUTS.add("Hello my name is Alina. I have been working as a professional")
    MAX_NEW_TOKENS = 10

    def setUp(self):
@ -513,6 +516,8 @@ class Pipeline4BitTest(Base4bitTest):
            max_new_tokens=self.MAX_NEW_TOKENS,
        )

+        # Avoid sampling different outputs
+        set_seed(42)
        # Real second forward pass
        pipeline_output = self.pipe(self.input_text)
        self.assertIn(pipeline_output[0]["generated_text"], self.EXPECTED_OUTPUTS)
--- a/tests/quantization/bnb/test_mixed_int8.py
+++ b/tests/quantization/bnb/test_mixed_int8.py
@ -27,6 +27,7 @@ from transformers import (
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
+    set_seed,
 )
 from transformers.models.opt.modeling_opt import OPTAttention
 from transformers.testing_utils import (
@ -113,6 +114,8 @@ class BaseMixedInt8Test(unittest.TestCase):
    MAX_NEW_TOKENS = 10
    # Expected values with offload
    EXPECTED_OUTPUTS.add("Hello my name is John and I am a professional photographer based in")
+    # Expected values on Intel XPU and NV A100
+    EXPECTED_OUTPUTS.add("Hello my name is Alina. I have been working as a professional")

    def setUp(self):
        # Models and tokenizer
@ -649,6 +652,8 @@ class MixedInt8TestPipeline(BaseMixedInt8Test):
            max_new_tokens=self.MAX_NEW_TOKENS,
        )

+        # Avoid sampling different outputs
+        set_seed(42)
        # Real second forward pass
        pipeline_output = self.pipe(self.input_text)
        self.assertIn(pipeline_output[0]["generated_text"], self.EXPECTED_OUTPUTS)
--- a/utils/get_pr_run_slow_jobs.py
+++ b/utils/get_pr_run_slow_jobs.py
@ -0,0 +1,133 @@
+import argparse
+import json
+import re
+import string
+
+
+MAX_NUM_JOBS_TO_SUGGEST = 16
+
+
+def get_jobs_to_run():
+    # The file `pr_files.txt` contains the information about the files changed in a pull request, and it is prepared by
+    # the caller (using GitHub api).
+    # We can also use the following api to get the information if we don't have them before calling this script.
+    # url = f"https://api.github.com/repos/huggingface/transformers/pulls/PULL_NUMBER/files?ref={pr_sha}"
+    with open("pr_files.txt") as fp:
+        pr_files = json.load(fp)
+        pr_files = [{k: v for k, v in item.items() if k in ["filename", "status"]} for item in pr_files]
+    pr_files = [item["filename"] for item in pr_files if item["status"] in ["added", "modified"]]
+
+    # models or quantizers
+    re_1 = re.compile(r"src/transformers/(models/.*)/modeling_.*\.py")
+    re_2 = re.compile(r"src/transformers/(quantizers/quantizer_.*)\.py")
+
+    # tests for models or quantizers
+    re_3 = re.compile(r"tests/(models/.*)/test_.*\.py")
+    re_4 = re.compile(r"tests/(quantization/.*)/test_.*\.py")
+
+    # files in a model directory but not necessary a modeling file
+    re_5 = re.compile(r"src/transformers/(models/.*)/.*\.py")
+
+    regexes = [re_1, re_2, re_3, re_4, re_5]
+
+    jobs_to_run = []
+    for pr_file in pr_files:
+        for regex in regexes:
+            matched = regex.findall(pr_file)
+            if len(matched) > 0:
+                item = matched[0]
+                item = item.replace("quantizers/quantizer_", "quantization/")
+                # TODO: for files in `quantizers`, the processed item above may not exist. Try using a fuzzy matching
+                if item in repo_content:
+                    jobs_to_run.append(item)
+                break
+    jobs_to_run = sorted(set(jobs_to_run))
+
+    return jobs_to_run
+
+
+def parse_message(message: str) -> str:
+    """
+    Parses a GitHub pull request's comment to find the models specified in it to run slow CI.
+
+    Args:
+        message (`str`): The body of a GitHub pull request's comment.
+
+    Returns:
+        `str`: The substring in `message` after `run-slow`, run_slow` or run slow`. If no such prefix is found, the
+        empty string is returned.
+    """
+    if message is None:
+        return ""
+
+    message = message.strip().lower()
+
+    # run-slow: model_1, model_2, quantization_1, quantization_2
+    if not message.startswith(("run-slow", "run_slow", "run slow")):
+        return ""
+    message = message[len("run slow") :]
+    # remove leading `:`
+    while message.strip().startswith(":"):
+        message = message.strip()[1:]
+
+    return message
+
+
+def get_jobs(message: str):
+    models = parse_message(message)
+    return models.replace(",", " ").split()
+
+
+def check_name(model_name: str):
+    allowed = string.ascii_letters + string.digits + "_"
+    return not (model_name.startswith("_") or model_name.endswith("_")) and all(c in allowed for c in model_name)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--message", type=str, default="", help="The content of a comment.")
+    parser.add_argument("--quantization", action="store_true", help="If we collect quantization tests")
+    args = parser.parse_args()
+
+    # The files are prepared by the caller (using GitHub api).
+    # We can also use the following api to get the information if we don't have them before calling this script.
+    # url = f"https://api.github.com/repos/OWNER/REPO/contents/PATH?ref={pr_sha}"
+    # (we avoid to checkout the repository using `actions/checkout` to reduce the run time, but mostly to avoid the potential security issue as much as possible)
+    repo_content = []
+    for filename in ["tests_dir.txt", "tests_models_dir.txt", "tests_quantization_dir.txt"]:
+        with open(filename) as fp:
+            data = json.load(fp)
+            data = [item["path"][len("tests/") :] for item in data if item["type"] == "dir"]
+            repo_content.extend(data)
+
+    # These don't have the prefix `models/` or `quantization/`, so we need to add them.
+    if args.message:
+        specified_jobs = get_jobs(args.message)
+        specified_jobs = [job for job in specified_jobs if check_name(job)]
+
+        # Add prefix (`models/` or `quantization`)
+        jobs_to_run = []
+        for job in specified_jobs:
+            if not args.quantization:
+                if f"models/{job}" in repo_content:
+                    jobs_to_run.append(f"models/{job}")
+                elif job in repo_content and job != "quantization":
+                    jobs_to_run.append(job)
+            elif f"quantization/{job}" in repo_content:
+                jobs_to_run.append(f"quantization/{job}")
+
+        print(sorted(set(jobs_to_run)))
+
+    else:
+        # Compute (from the added/modified files) the directories under `tests/`, `tests/models/` and `tests/quantization`to run tests.
+        # These are already with the prefix `models/` or `quantization/`, so we don't need to add them.
+        jobs_to_run = get_jobs_to_run()
+        jobs_to_run = [x.replace("models/", "").replace("quantization/", "") for x in jobs_to_run]
+        jobs_to_run = [job for job in jobs_to_run if check_name(job)]
+
+        if len(jobs_to_run) > MAX_NUM_JOBS_TO_SUGGEST:
+            jobs_to_run = jobs_to_run[:MAX_NUM_JOBS_TO_SUGGEST]
+
+        suggestion = f"{', '.join(jobs_to_run)}"
+
+        print(suggestion)