diff --git a/.github/workflows/self-comment-ci.yml b/.github/workflows/self-comment-ci.yml index 3f2b637e047..6287bb7154f 100644 --- a/.github/workflows/self-comment-ci.yml +++ b/.github/workflows/self-comment-ci.yml @@ -30,7 +30,7 @@ jobs: runs-on: ubuntu-22.04 name: Get PR number # For security: only allow team members to run - if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }} + if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }} outputs: PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }} steps: @@ -98,6 +98,7 @@ jobs: if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}} outputs: models: ${{ steps.models_to_run.outputs.models }} + quantizations: ${{ steps.models_to_run.outputs.quantizations }} steps: - uses: actions/checkout@v4 with: @@ -121,6 +122,8 @@ jobs: python -m pip install GitPython python utils/pr_slow_ci_models.py --message "$PR_COMMENT" | tee output.txt echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV + python utils/pr_slow_ci_models.py --message "$PR_COMMENT" --quantization | tee output2.txt + echo "quantizations=$(tail -n 1 output2.txt)" >> $GITHUB_ENV - name: Show models to test id: models_to_run @@ -128,10 +131,12 @@ jobs: echo "${{ env.models }}" echo "models=${{ env.models }}" >> $GITHUB_ENV echo "models=${{ env.models }}" >> $GITHUB_OUTPUT + echo "${{ env.quantizations }}" + echo "quantizations=${{ env.quantizations }}" >> $GITHUB_OUTPUT reply_to_comment: name: Reply to the comment - if: ${{ needs.get-tests.outputs.models != '[]' }} + if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }} needs: [get-pr-number, get-tests] permissions: pull-requests: write @@ -141,17 +146,18 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} MODELS: ${{ needs.get-tests.outputs.models }} + BODY: "This comment contains run-slow, running the specified jobs:\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}" run: | gh api \ --method POST \ -H "Accept: application/vnd.github+json" \ -H "X-GitHub-Api-Version: 2022-11-28" \ repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \ - -f "body=This comment contains run-slow, running the specified jobs: ${{ env.MODELS }} ..." + -f "body=This comment contains run-slow, running the specified jobs: ${{ env.BODY }} ..." create_run: name: Create run - if: ${{ needs.get-tests.outputs.models != '[]' }} + if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }} needs: [get-sha, get-tests, reply_to_comment] permissions: statuses: write @@ -173,20 +179,20 @@ jobs: -f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests" run_models_gpu: - name: Run all tests for the model - if: ${{ needs.get-tests.outputs.models != '[]' }} - needs: [get-pr-number, get-sha, get-tests, create_run] - strategy: - fail-fast: false - matrix: - folders: ${{ fromJson(needs.get-tests.outputs.models) }} - machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] - runs-on: - group: '${{ matrix.machine_type }}' - container: - image: huggingface/transformers-all-latest-gpu - options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - steps: + name: Run all tests for the model + if: ${{ needs.get-tests.outputs.models != '[]' }} + needs: [get-pr-number, get-sha, get-tests, create_run] + strategy: + fail-fast: false + matrix: + folders: ${{ fromJson(needs.get-tests.outputs.models) }} + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' + container: + image: huggingface/transformers-all-latest-gpu + options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + steps: - name: Echo input and matrix info shell: bash run: | @@ -206,20 +212,19 @@ jobs: - name: Checkout to PR merge commit working-directory: /transformers run: | - git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge - git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge - git log -1 --format=%H + git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge + git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge + git log -1 --format=%H - name: Verify merge commit SHA env: VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }} working-directory: /transformers run: | - PR_MERGE_SHA=$(git log -1 --format=%H) - if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then - echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!"; - exit -1; - fi + PR_MERGE_SHA=$(git log -1 --format=%H) + if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then + echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!"; + exit -1; - name: Reinstall transformers in edit mode (remove the one installed during docker image build) working-directory: /transformers @@ -279,9 +284,105 @@ jobs: name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + run_quantization_torch_gpu: + name: Run all tests for a quantization + if: ${{ needs.get-tests.outputs.quantizations != '[]' }} + needs: [get-pr-number, get-sha, get-tests, create_run] + strategy: + fail-fast: false + matrix: + folders: ${{ fromJson(needs.get-tests.outputs.quantizations) }} + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' + container: + image: huggingface/transformers-quantization-latest-gpu + options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + steps: + - name: Echo folder ${{ matrix.folders }} + shell: bash + run: | + echo "${{ matrix.folders }}" + matrix_folders=${{ matrix.folders }} + matrix_folders=${matrix_folders/'quantization/'/'quantization_'} + echo "$matrix_folders" + echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV + + - name: Checkout to PR merge commit + working-directory: /transformers + run: | + git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge + git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge + git log -1 --format=%H + + - name: Verify merge commit SHA + env: + VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }} + working-directory: /transformers + run: | + PR_MERGE_SHA=$(git log -1 --format=%H) + if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then + echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!"; + exit -1; + + - name: Reinstall transformers in edit mode (remove the one installed during docker image build) + working-directory: /transformers + run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + - name: NVIDIA-SMI + run: | + nvidia-smi + + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + echo "${{ matrix.machine_type }}" + if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then + machine_type=single-gpu + elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then + machine_type=multi-gpu + else + machine_type=${{ matrix.machine_type }} + fi + echo "$machine_type" + echo "machine_type=$machine_type" >> $GITHUB_ENV + + - name: Environment + working-directory: /transformers + run: | + python3 utils/print_env.py + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - name: Run quantization tests on GPU + working-directory: /transformers + run: | + python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} + + - name: Failure short reports + if: ${{ failure() }} + continue-on-error: true + run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt + + - name: Make sure report directory exists + shell: bash + run: | + mkdir -p /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports + echo "hello" > /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports/hello.txt + echo "${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports" + + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports" + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports + update_run_status: name: Update Check Run Status - needs: [get-sha, create_run, run_models_gpu] + needs: [get-sha, create_run, run_models_gpu, run_quantization_torch_gpu] permissions: statuses: write if: ${{ always() && needs.create_run.result == 'success' }} @@ -289,16 +390,17 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.run_models_gpu.result) && contains(fromJSON('["skipped", "success"]'), needs.run_quantization_torch_gpu.result) }} steps: - name: Get `run_models_gpu` job status run: | echo "${{ needs.run_models_gpu.result }}" - if [ "${{ needs.run_models_gpu.result }}" = "cancelled" ]; then - echo "STATUS=failure" >> $GITHUB_ENV - elif [ "${{ needs.run_models_gpu.result }}" = "skipped" ]; then + echo "${{ needs.run_quantization_torch_gpu.result }}" + echo $STATUS_OK + if [ "$STATUS_OK" = "true" ]; then echo "STATUS=success" >> $GITHUB_ENV else - echo "STATUS=${{ needs.run_models_gpu.result }}" >> $GITHUB_ENV + echo "STATUS=failure" >> $GITHUB_ENV fi - name: Update PR commit statuses diff --git a/utils/pr_slow_ci_models.py b/utils/pr_slow_ci_models.py index c6a24c0f219..312bd078e63 100644 --- a/utils/pr_slow_ci_models.py +++ b/utils/pr_slow_ci_models.py @@ -27,6 +27,7 @@ python utils/pr_slow_ci_models.py """ import argparse +import os.path import re import string from pathlib import Path @@ -142,6 +143,7 @@ def check_model_names(model_name: str): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--message", type=str, default="", help="The content of a comment.") + parser.add_argument("--quantization", action="store_true", help="If we collect quantization tests") args = parser.parse_args() new_model = get_new_model() @@ -149,6 +151,16 @@ if __name__ == "__main__": models = ([] if new_model == "" else [new_model]) + specified_models # a guard for strange model names models = [model for model in models if check_model_names(model)] - # Add "models/" - models = [f"models/{model}" for model in models] - print(sorted(set(models))) + + # Add prefix + final_list = [] + for model in models: + if not args.quantization: + if os.path.isdir(f"tests/models/{model}"): + final_list.append(f"models/{model}") + elif os.path.isdir(f"tests/{model}") and model != "quantization": + final_list.append(model) + elif os.path.isdir(f"tests/quantization/{model}"): + final_list.append(f"quantization/{model}") + + print(sorted(set(final_list)))