Commont bot CI for other jobs (generation / quantization) (#35341)

* quantization CI on PRs * fix * fix * add 2 members --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-08-02 03:01:07 +06:00 · 2025-02-04 14:42:51 +01:00 · 2025-02-04 14:42:51 +01:00 · f19bfa50e7
commit f19bfa50e7
parent a93b80588b
2 changed files with 148 additions and 34 deletions
--- a/.github/workflows/self-comment-ci.yml
+++ b/.github/workflows/self-comment-ci.yml
@ -30,7 +30,7 @@ jobs:
    runs-on: ubuntu-22.04
    name: Get PR number
    # For security: only allow team members to run
-    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
+    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
    outputs:
      PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
    steps:
@ -98,6 +98,7 @@ jobs:
    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
    outputs:
      models: ${{ steps.models_to_run.outputs.models }}
      quantizations: ${{ steps.models_to_run.outputs.quantizations }}
    steps:
      - uses: actions/checkout@v4
        with:
@ -121,6 +122,8 @@ jobs:
          python -m pip install GitPython
          python utils/pr_slow_ci_models.py --message "$PR_COMMENT" | tee output.txt
          echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
          python utils/pr_slow_ci_models.py --message "$PR_COMMENT" --quantization | tee output2.txt
          echo "quantizations=$(tail -n 1 output2.txt)" >> $GITHUB_ENV
      - name: Show models to test
        id: models_to_run
@ -128,10 +131,12 @@ jobs:
          echo "${{ env.models }}"
          echo "models=${{ env.models }}" >> $GITHUB_ENV
          echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
          echo "${{ env.quantizations }}"
          echo "quantizations=${{ env.quantizations }}" >> $GITHUB_OUTPUT
  reply_to_comment:
    name: Reply to the comment
-    if: ${{ needs.get-tests.outputs.models != '[]' }}
+    if: ${{ needs.get-tests.outputs.models != '[]'  || needs.get-tests.outputs.quantizations != '[]' }}
    needs: [get-pr-number, get-tests]
    permissions:
      pull-requests: write
@ -141,17 +146,18 @@ jobs:
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          MODELS: ${{ needs.get-tests.outputs.models }}
          BODY: "This comment contains run-slow, running the specified jobs:\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}"
        run: |
          gh api \
            --method POST \
            -H "Accept: application/vnd.github+json" \
            -H "X-GitHub-Api-Version: 2022-11-28" \
            repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
-            -f "body=This comment contains run-slow, running the specified jobs: ${{ env.MODELS }} ..."
+            -f "body=This comment contains run-slow, running the specified jobs: ${{ env.BODY }} ..."
  create_run:
    name: Create run
-    if: ${{ needs.get-tests.outputs.models != '[]' }}
+    if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }}
    needs: [get-sha, get-tests, reply_to_comment]
    permissions:
      statuses: write
@ -173,20 +179,20 @@ jobs:
            -f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests"
  run_models_gpu:
-      name: Run all tests for the model
+    name: Run all tests for the model
-      if: ${{ needs.get-tests.outputs.models != '[]' }}
+    if: ${{ needs.get-tests.outputs.models != '[]' }}
-      needs: [get-pr-number, get-sha, get-tests, create_run]
+    needs: [get-pr-number, get-sha, get-tests, create_run]
-      strategy:
+    strategy:
-        fail-fast: false
+      fail-fast: false
-        matrix:
+      matrix:
-          folders: ${{ fromJson(needs.get-tests.outputs.models) }}
+        folders: ${{ fromJson(needs.get-tests.outputs.models) }}
-          machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
-      runs-on:
+    runs-on:
-         group: '${{ matrix.machine_type }}'
+       group: '${{ matrix.machine_type }}'
-      container:
+    container:
-        image: huggingface/transformers-all-latest-gpu
+      image: huggingface/transformers-all-latest-gpu
-        options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-      steps:
+    steps:
      - name: Echo input and matrix info
        shell: bash
        run: |
@ -206,20 +212,19 @@ jobs:
      - name: Checkout to PR merge commit
        working-directory: /transformers
        run: |
-            git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
+          git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-            git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
+          git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-            git log -1 --format=%H
+          git log -1 --format=%H
      - name: Verify merge commit SHA
        env:
          VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
        working-directory: /transformers
        run: |
-            PR_MERGE_SHA=$(git log -1 --format=%H)
+          PR_MERGE_SHA=$(git log -1 --format=%H)
-            if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
+          if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
-              echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
+            echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
-              exit -1;
+            exit -1;
            fi
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
@ -279,9 +284,105 @@ jobs:
          name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
          path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
  run_quantization_torch_gpu:
    name: Run all tests for a quantization
    if: ${{ needs.get-tests.outputs.quantizations != '[]' }}
    needs: [get-pr-number, get-sha, get-tests, create_run]
    strategy:
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.get-tests.outputs.quantizations) }}
        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
    runs-on:
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-quantization-latest-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
      - name: Checkout to PR merge commit
        working-directory: /transformers
        run: |
          git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
          git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
          git log -1 --format=%H
      - name: Verify merge commit SHA
        env:
          VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
        working-directory: /transformers
        run: |
          PR_MERGE_SHA=$(git log -1 --format=%H)
          if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
            echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
            exit -1;
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run quantization tests on GPU
        working-directory: /transformers
        run: |
          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
      - name: Make sure report directory exists
        shell: bash
        run: |
          mkdir -p /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports
          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports/hello.txt
          echo "${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports"
      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
          path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
  update_run_status:
    name: Update Check Run Status
-    needs: [get-sha, create_run, run_models_gpu]
+    needs: [get-sha, create_run, run_models_gpu, run_quantization_torch_gpu]
    permissions:
      statuses: write
    if: ${{ always() && needs.create_run.result == 'success' }}
@ -289,16 +390,17 @@ jobs:
    env:
      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
      STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.run_models_gpu.result) && contains(fromJSON('["skipped", "success"]'), needs.run_quantization_torch_gpu.result) }}
    steps:
      - name: Get `run_models_gpu` job status
        run: |
          echo "${{ needs.run_models_gpu.result }}"
-          if [ "${{ needs.run_models_gpu.result }}" = "cancelled" ]; then
+          echo "${{ needs.run_quantization_torch_gpu.result }}"
-            echo "STATUS=failure" >> $GITHUB_ENV
+          echo $STATUS_OK
-          elif [ "${{ needs.run_models_gpu.result }}" = "skipped" ]; then
+          if [ "$STATUS_OK" = "true" ]; then
            echo "STATUS=success" >> $GITHUB_ENV
          else
-            echo "STATUS=${{ needs.run_models_gpu.result }}" >> $GITHUB_ENV
+            echo "STATUS=failure" >> $GITHUB_ENV
          fi
      - name: Update PR commit statuses
--- a/utils/pr_slow_ci_models.py
+++ b/utils/pr_slow_ci_models.py
@ -27,6 +27,7 @@ python utils/pr_slow_ci_models.py
 """
 import argparse
 import os.path
 import re
 import string
 from pathlib import Path
@ -142,6 +143,7 @@ def check_model_names(model_name: str):
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--message", type=str, default="", help="The content of a comment.")
    parser.add_argument("--quantization", action="store_true", help="If we collect quantization tests")
    args = parser.parse_args()
    new_model = get_new_model()
@ -149,6 +151,16 @@ if __name__ == "__main__":
    models = ([] if new_model == "" else [new_model]) + specified_models
    # a guard for strange model names
    models = [model for model in models if check_model_names(model)]
-    # Add "models/"
+
-    models = [f"models/{model}" for model in models]
+    # Add prefix
-    print(sorted(set(models)))
+    final_list = []
    for model in models:
        if not args.quantization:
            if os.path.isdir(f"tests/models/{model}"):
                final_list.append(f"models/{model}")
            elif os.path.isdir(f"tests/{model}") and model != "quantization":
                final_list.append(model)
        elif os.path.isdir(f"tests/quantization/{model}"):
            final_list.append(f"quantization/{model}")
    print(sorted(set(final_list)))