name: PR comment GitHub CI on: issue_comment: types: - created branches-ignore: - main concurrency: group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow') }} cancel-in-progress: true permissions: read-all env: HF_HOME: /mnt/cache TRANSFORMERS_IS_CI: yes OMP_NUM_THREADS: 8 MKL_NUM_THREADS: 8 RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`. HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} TF_FORCE_GPU_ALLOW_GROWTH: true CUDA_VISIBLE_DEVICES: 0,1 jobs: get-pr-number: runs-on: ubuntu-22.04 name: Get PR number # For security: only allow team members to run if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr", "eustlb", "MekkCyber", "manueldeprada", "vasqu"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }} outputs: PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }} steps: - name: Get PR number shell: bash run: | if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV else echo "PR_NUMBER=" >> $GITHUB_ENV fi - name: Check PR number shell: bash run: | echo "${{ env.PR_NUMBER }}" - name: Set PR number id: set_pr_number run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT" get-sha: runs-on: ubuntu-22.04 needs: get-pr-number if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}} outputs: PR_HEAD_SHA: ${{ steps.get_sha.outputs.PR_HEAD_SHA }} PR_MERGE_SHA: ${{ steps.get_sha.outputs.PR_MERGE_SHA }} steps: - uses: actions/checkout@v4 with: fetch-depth: "0" ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge" - name: Get SHA (and verify timestamps against the issue comment date) id: get_sha env: PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }} COMMENT_DATE: ${{ github.event.comment.created_at }} run: | git fetch origin refs/pull/$PR_NUMBER/head:refs/remotes/pull/$PR_NUMBER/head git checkout refs/remotes/pull/$PR_NUMBER/head echo "PR_HEAD_SHA: $(git log -1 --format=%H)" echo "PR_HEAD_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT" git fetch origin refs/pull/$PR_NUMBER/merge:refs/remotes/pull/$PR_NUMBER/merge git checkout refs/remotes/pull/$PR_NUMBER/merge echo "PR_MERGE_SHA: $(git log -1 --format=%H)" echo "PR_MERGE_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT" PR_MERGE_COMMIT_TIMESTAMP=$(git log -1 --date=unix --format=%cd) echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP" COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s") echo "COMMENT_DATE: $COMMENT_DATE" echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP" if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!"; exit -1; fi # use a python script to handle this complex logic # case 1: `run-slow` (auto. infer with limited number of models, but in particular, new model) # case 2: `run-slow model_1, model_2` get-tests: runs-on: ubuntu-22.04 needs: [get-pr-number, get-sha] if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}} outputs: models: ${{ steps.models_to_run.outputs.models }} quantizations: ${{ steps.models_to_run.outputs.quantizations }} steps: - uses: actions/checkout@v4 with: fetch-depth: "0" ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge" - name: Verify merge commit SHA env: VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }} run: | PR_MERGE_SHA=$(git log -1 --format=%H) if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!"; exit -1; fi - name: Get models to test env: PR_COMMENT: ${{ github.event.comment.body }} run: | python -m pip install GitPython python utils/pr_slow_ci_models.py --message "$PR_COMMENT" | tee output.txt echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV python utils/pr_slow_ci_models.py --message "$PR_COMMENT" --quantization | tee output2.txt echo "quantizations=$(tail -n 1 output2.txt)" >> $GITHUB_ENV - name: Show models to test id: models_to_run run: | echo "${{ env.models }}" echo "models=${{ env.models }}" >> $GITHUB_ENV echo "models=${{ env.models }}" >> $GITHUB_OUTPUT echo "${{ env.quantizations }}" echo "quantizations=${{ env.quantizations }}" >> $GITHUB_OUTPUT reply_to_comment: name: Reply to the comment if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }} needs: [get-pr-number, get-tests] permissions: pull-requests: write runs-on: ubuntu-22.04 steps: - name: Reply to the comment env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} MODELS: ${{ needs.get-tests.outputs.models }} BODY: "\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}" run: | gh api \ --method POST \ -H "Accept: application/vnd.github+json" \ -H "X-GitHub-Api-Version: 2022-11-28" \ repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \ -f "body=This comment contains run-slow, running the specified jobs: ${{ env.BODY }} ..." create_run: name: Create run if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }} needs: [get-sha, get-tests, reply_to_comment] permissions: statuses: write runs-on: ubuntu-22.04 steps: - name: Create Run id: create_run env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Create a commit status (pending) for a run of this workflow. The status has to be updated later in `update_run_status`. # See https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} run: | gh api \ --method POST \ -H "Accept: application/vnd.github+json" \ -H "X-GitHub-Api-Version: 2022-11-28" \ repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \ -f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests" run_models_gpu: name: Run all tests for the model if: ${{ needs.get-tests.outputs.models != '[]' }} needs: [get-pr-number, get-sha, get-tests, create_run] strategy: fail-fast: false matrix: folders: ${{ fromJson(needs.get-tests.outputs.models) }} machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache] runs-on: group: '${{ matrix.machine_type }}' container: image: huggingface/transformers-all-latest-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: - name: Echo input and matrix info shell: bash run: | echo "${{ matrix.folders }}" - name: Echo folder ${{ matrix.folders }} shell: bash # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to # set the artifact folder names (because the character `/` is not allowed). run: | echo "${{ matrix.folders }}" matrix_folders=${{ matrix.folders }} matrix_folders=${matrix_folders/'models/'/'models_'} echo "$matrix_folders" echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV - name: Checkout to PR merge commit working-directory: /transformers run: | git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge git log -1 --format=%H - name: Verify merge commit SHA env: VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }} working-directory: /transformers run: | PR_MERGE_SHA=$(git log -1 --format=%H) if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!"; exit -1; fi - name: Reinstall transformers in edit mode (remove the one installed during docker image build) working-directory: /transformers run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - name: NVIDIA-SMI run: | nvidia-smi - name: Set `machine_type` for report and artifact names working-directory: /transformers shell: bash run: | echo "${{ matrix.machine_type }}" if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then machine_type=single-gpu elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then machine_type=multi-gpu else machine_type=${{ matrix.machine_type }} fi echo "$machine_type" echo "machine_type=$machine_type" >> $GITHUB_ENV - name: Environment working-directory: /transformers run: | python3 utils/print_env.py - name: Show installed libraries and their versions working-directory: /transformers run: pip freeze - name: Run all tests on GPU working-directory: /transformers run: | export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})" echo $CUDA_VISIBLE_DEVICES python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - name: Make sure report directory exists shell: bash run: | mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports" - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports run_quantization_torch_gpu: name: Run all tests for a quantization if: ${{ needs.get-tests.outputs.quantizations != '[]' }} needs: [get-pr-number, get-sha, get-tests, create_run] strategy: fail-fast: false matrix: folders: ${{ fromJson(needs.get-tests.outputs.quantizations) }} machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache] runs-on: group: '${{ matrix.machine_type }}' container: image: huggingface/transformers-quantization-latest-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: - name: Echo folder ${{ matrix.folders }} shell: bash run: | echo "${{ matrix.folders }}" matrix_folders=${{ matrix.folders }} matrix_folders=${matrix_folders/'quantization/'/'quantization_'} echo "$matrix_folders" echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV - name: Checkout to PR merge commit working-directory: /transformers run: | git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge git log -1 --format=%H - name: Verify merge commit SHA env: VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }} working-directory: /transformers run: | PR_MERGE_SHA=$(git log -1 --format=%H) if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!"; exit -1; fi - name: Reinstall transformers in edit mode (remove the one installed during docker image build) working-directory: /transformers run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - name: NVIDIA-SMI run: | nvidia-smi - name: Set `machine_type` for report and artifact names working-directory: /transformers shell: bash run: | echo "${{ matrix.machine_type }}" if [ "${{ matrix.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then machine_type=single-gpu elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then machine_type=multi-gpu else machine_type=${{ matrix.machine_type }} fi echo "$machine_type" echo "machine_type=$machine_type" >> $GITHUB_ENV - name: Environment working-directory: /transformers run: | python3 utils/print_env.py - name: Show installed libraries and their versions working-directory: /transformers run: pip freeze - name: Run quantization tests on GPU working-directory: /transformers run: | python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - name: Make sure report directory exists shell: bash run: | mkdir -p /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports echo "hello" > /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports/hello.txt echo "${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports" - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports update_run_status: name: Update Check Run Status needs: [get-sha, create_run, run_models_gpu, run_quantization_torch_gpu] permissions: statuses: write if: ${{ always() && needs.create_run.result == 'success' }} runs-on: ubuntu-22.04 env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.run_models_gpu.result) && contains(fromJSON('["skipped", "success"]'), needs.run_quantization_torch_gpu.result) }} steps: - name: Get `run_models_gpu` job status run: | echo "${{ needs.run_models_gpu.result }}" echo "${{ needs.run_quantization_torch_gpu.result }}" echo $STATUS_OK if [ "$STATUS_OK" = "true" ]; then echo "STATUS=success" >> $GITHUB_ENV else echo "STATUS=failure" >> $GITHUB_ENV fi - name: Update PR commit statuses run: | echo "${{ needs.run_models_gpu.result }}" echo "${{ env.STATUS }}" gh api \ --method POST \ -H "Accept: application/vnd.github+json" \ -H "X-GitHub-Api-Version: 2022-11-28" \ repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \ -f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Slow CI job" -f "context=pytest/custom-tests"