Commont bot CI for other jobs (generation / quantization) (#35341)

* quantization CI on PRs

* fix

* fix

* add 2 members

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar 2025-02-04 14:42:51 +01:00 committed by GitHub
parent a93b80588b
commit f19bfa50e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 148 additions and 34 deletions

View File

@ -30,7 +30,7 @@ jobs:
runs-on: ubuntu-22.04 runs-on: ubuntu-22.04
name: Get PR number name: Get PR number
# For security: only allow team members to run # For security: only allow team members to run
if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }} if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
outputs: outputs:
PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }} PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
steps: steps:
@ -98,6 +98,7 @@ jobs:
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}} if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
outputs: outputs:
models: ${{ steps.models_to_run.outputs.models }} models: ${{ steps.models_to_run.outputs.models }}
quantizations: ${{ steps.models_to_run.outputs.quantizations }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
@ -121,6 +122,8 @@ jobs:
python -m pip install GitPython python -m pip install GitPython
python utils/pr_slow_ci_models.py --message "$PR_COMMENT" | tee output.txt python utils/pr_slow_ci_models.py --message "$PR_COMMENT" | tee output.txt
echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
python utils/pr_slow_ci_models.py --message "$PR_COMMENT" --quantization | tee output2.txt
echo "quantizations=$(tail -n 1 output2.txt)" >> $GITHUB_ENV
- name: Show models to test - name: Show models to test
id: models_to_run id: models_to_run
@ -128,10 +131,12 @@ jobs:
echo "${{ env.models }}" echo "${{ env.models }}"
echo "models=${{ env.models }}" >> $GITHUB_ENV echo "models=${{ env.models }}" >> $GITHUB_ENV
echo "models=${{ env.models }}" >> $GITHUB_OUTPUT echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
echo "${{ env.quantizations }}"
echo "quantizations=${{ env.quantizations }}" >> $GITHUB_OUTPUT
reply_to_comment: reply_to_comment:
name: Reply to the comment name: Reply to the comment
if: ${{ needs.get-tests.outputs.models != '[]' }} if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }}
needs: [get-pr-number, get-tests] needs: [get-pr-number, get-tests]
permissions: permissions:
pull-requests: write pull-requests: write
@ -141,17 +146,18 @@ jobs:
env: env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
MODELS: ${{ needs.get-tests.outputs.models }} MODELS: ${{ needs.get-tests.outputs.models }}
BODY: "This comment contains run-slow, running the specified jobs:\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}"
run: | run: |
gh api \ gh api \
--method POST \ --method POST \
-H "Accept: application/vnd.github+json" \ -H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \ -H "X-GitHub-Api-Version: 2022-11-28" \
repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \ repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
-f "body=This comment contains run-slow, running the specified jobs: ${{ env.MODELS }} ..." -f "body=This comment contains run-slow, running the specified jobs: ${{ env.BODY }} ..."
create_run: create_run:
name: Create run name: Create run
if: ${{ needs.get-tests.outputs.models != '[]' }} if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }}
needs: [get-sha, get-tests, reply_to_comment] needs: [get-sha, get-tests, reply_to_comment]
permissions: permissions:
statuses: write statuses: write
@ -173,20 +179,20 @@ jobs:
-f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests" -f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests"
run_models_gpu: run_models_gpu:
name: Run all tests for the model name: Run all tests for the model
if: ${{ needs.get-tests.outputs.models != '[]' }} if: ${{ needs.get-tests.outputs.models != '[]' }}
needs: [get-pr-number, get-sha, get-tests, create_run] needs: [get-pr-number, get-sha, get-tests, create_run]
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
folders: ${{ fromJson(needs.get-tests.outputs.models) }} folders: ${{ fromJson(needs.get-tests.outputs.models) }}
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
runs-on: runs-on:
group: '${{ matrix.machine_type }}' group: '${{ matrix.machine_type }}'
container: container:
image: huggingface/transformers-all-latest-gpu image: huggingface/transformers-all-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps: steps:
- name: Echo input and matrix info - name: Echo input and matrix info
shell: bash shell: bash
run: | run: |
@ -206,20 +212,19 @@ jobs:
- name: Checkout to PR merge commit - name: Checkout to PR merge commit
working-directory: /transformers working-directory: /transformers
run: | run: |
git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
git log -1 --format=%H git log -1 --format=%H
- name: Verify merge commit SHA - name: Verify merge commit SHA
env: env:
VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }} VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
working-directory: /transformers working-directory: /transformers
run: | run: |
PR_MERGE_SHA=$(git log -1 --format=%H) PR_MERGE_SHA=$(git log -1 --format=%H)
if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!"; echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
exit -1; exit -1;
fi
- name: Reinstall transformers in edit mode (remove the one installed during docker image build) - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers working-directory: /transformers
@ -279,9 +284,105 @@ jobs:
name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
run_quantization_torch_gpu:
name: Run all tests for a quantization
if: ${{ needs.get-tests.outputs.quantizations != '[]' }}
needs: [get-pr-number, get-sha, get-tests, create_run]
strategy:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.get-tests.outputs.quantizations) }}
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-quantization-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Echo folder ${{ matrix.folders }}
shell: bash
run: |
echo "${{ matrix.folders }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Checkout to PR merge commit
working-directory: /transformers
run: |
git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
git log -1 --format=%H
- name: Verify merge commit SHA
env:
VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
working-directory: /transformers
run: |
PR_MERGE_SHA=$(git log -1 --format=%H)
if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
exit -1;
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Set `machine_type` for report and artifact names
working-directory: /transformers
shell: bash
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run quantization tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Make sure report directory exists
shell: bash
run: |
mkdir -p /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports/hello.txt
echo "${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports"
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
update_run_status: update_run_status:
name: Update Check Run Status name: Update Check Run Status
needs: [get-sha, create_run, run_models_gpu] needs: [get-sha, create_run, run_models_gpu, run_quantization_torch_gpu]
permissions: permissions:
statuses: write statuses: write
if: ${{ always() && needs.create_run.result == 'success' }} if: ${{ always() && needs.create_run.result == 'success' }}
@ -289,16 +390,17 @@ jobs:
env: env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.run_models_gpu.result) && contains(fromJSON('["skipped", "success"]'), needs.run_quantization_torch_gpu.result) }}
steps: steps:
- name: Get `run_models_gpu` job status - name: Get `run_models_gpu` job status
run: | run: |
echo "${{ needs.run_models_gpu.result }}" echo "${{ needs.run_models_gpu.result }}"
if [ "${{ needs.run_models_gpu.result }}" = "cancelled" ]; then echo "${{ needs.run_quantization_torch_gpu.result }}"
echo "STATUS=failure" >> $GITHUB_ENV echo $STATUS_OK
elif [ "${{ needs.run_models_gpu.result }}" = "skipped" ]; then if [ "$STATUS_OK" = "true" ]; then
echo "STATUS=success" >> $GITHUB_ENV echo "STATUS=success" >> $GITHUB_ENV
else else
echo "STATUS=${{ needs.run_models_gpu.result }}" >> $GITHUB_ENV echo "STATUS=failure" >> $GITHUB_ENV
fi fi
- name: Update PR commit statuses - name: Update PR commit statuses

View File

@ -27,6 +27,7 @@ python utils/pr_slow_ci_models.py
""" """
import argparse import argparse
import os.path
import re import re
import string import string
from pathlib import Path from pathlib import Path
@ -142,6 +143,7 @@ def check_model_names(model_name: str):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--message", type=str, default="", help="The content of a comment.") parser.add_argument("--message", type=str, default="", help="The content of a comment.")
parser.add_argument("--quantization", action="store_true", help="If we collect quantization tests")
args = parser.parse_args() args = parser.parse_args()
new_model = get_new_model() new_model = get_new_model()
@ -149,6 +151,16 @@ if __name__ == "__main__":
models = ([] if new_model == "" else [new_model]) + specified_models models = ([] if new_model == "" else [new_model]) + specified_models
# a guard for strange model names # a guard for strange model names
models = [model for model in models if check_model_names(model)] models = [model for model in models if check_model_names(model)]
# Add "models/"
models = [f"models/{model}" for model in models] # Add prefix
print(sorted(set(models))) final_list = []
for model in models:
if not args.quantization:
if os.path.isdir(f"tests/models/{model}"):
final_list.append(f"models/{model}")
elif os.path.isdir(f"tests/{model}") and model != "quantization":
final_list.append(model)
elif os.path.isdir(f"tests/quantization/{model}"):
final_list.append(f"quantization/{model}")
print(sorted(set(final_list)))