Trigger GitHub CI with a comment on PR (#35211)

* fix * fix * comment * final * final * final --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-08-01 02:31:11 +06:00 · 2024-12-18 13:56:49 +01:00 · 2024-12-18 13:56:49 +01:00 · f1b7634fc8
commit f1b7634fc8
parent c7e48053aa
3 changed files with 288 additions and 177 deletions
--- a/.github/workflows/self-comment-ci.yml
+++ b/.github/workflows/self-comment-ci.yml
@ -0,0 +1,253 @@
 name: PR comment GitHub CI
 on:
  issue_comment:
    types:
      - created
    branches-ignore:
      - main
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow') }}
  cancel-in-progress: true
 jobs:
  get-pr-number:
    runs-on: ubuntu-22.04
    name: Get PR number
    # For security: only allow team members to run
    if: contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez"]'), github.actor)
    outputs:
      PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
    steps:
      - name: Get PR number
        shell: bash
        run: |
          if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
            echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
          else
            echo "PR_NUMBER=" >> $GITHUB_ENV
          fi
      - name: Check PR number
        shell: bash
        run: |
          echo "${{ env.PR_NUMBER }}"
      - name: Set PR number
        id: set_pr_number
        run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
  get-sha:
    runs-on: ubuntu-22.04
    needs: get-pr-number
    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
    outputs:
      PR_HEAD_SHA: ${{ steps.get_sha.outputs.PR_HEAD_SHA }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: "0"
          ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
      - name: Get SHA
        id: get_sha
        env:
          PR_NUMBER: ${{needs.get-pr-number.outputs.PR_NUMBER}}
        run: |
            git fetch origin refs/pull/$PR_NUMBER/head:refs/remotes/pull/$PR_NUMBER/head
            git checkout refs/remotes/pull/$PR_NUMBER/head
            echo "PR_HEAD_SHA: $(git log -1 --format=%H)"
            echo "PR_HEAD_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
  # use a python script to handle this complex logic
  # case 1: `run-slow` (auto. infer with limited number of models, but in particular, new model)
  # case 2: `run-slow model_1, model_2`
  get-tests:
    runs-on: ubuntu-22.04
    needs: get-pr-number
    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
    permissions: write-all
    outputs:
      models: ${{ steps.models_to_run.outputs.models }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: "0"
          ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
      - name: Get models to test
        env:
          PR_COMMENT: ${{ github.event.comment.body }}
        run: |
          python -m pip install GitPython
          python utils/pr_slow_ci_models.py --message "$PR_COMMENT" | tee output.txt
          echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
      - name: Show models to test
        id: models_to_run
        run: |
          echo "${{ env.models }}"
          echo "models=${{ env.models }}" >> $GITHUB_ENV
          echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
      - name: Reply to the comment
        if: ${{ env.models != '[]' }}
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          gh api \
            --method POST \
            -H "Accept: application/vnd.github+json" \
            -H "X-GitHub-Api-Version: 2022-11-28" \
            repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
            -f "body=This comment contains run-slow, running the specified jobs: ${{ env.models }} ..."
  create_run:
    name: Create run
    if: ${{ needs.get-tests.outputs.models != '[]' }}
    needs: [get-sha, get-tests]
    permissions: write-all
    runs-on: ubuntu-22.04
    steps:
      - name: Create Run
        id: create_run
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          # Create a commit status (pending) for a run of this workflow. The status has to be updated later in `update_run_status`.
          # See https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status
          GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
        run: |
          gh api \
            --method POST \
            -H "Accept: application/vnd.github+json" \
            -H "X-GitHub-Api-Version: 2022-11-28" \
            repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
            -f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests"
  run_models_gpu:
      name: Run all tests for the model
      if: ${{ needs.get-tests.outputs.models != '[]' }}
      needs: [get-pr-number, get-tests, create_run]
      strategy:
        fail-fast: false
        matrix:
          folders: ${{ fromJson(needs.get-tests.outputs.models) }}
          machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
      runs-on:
         group: '${{ matrix.machine_type }}'
      container:
        image: huggingface/transformers-all-latest-gpu
        options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
      steps:
      - name: Echo input and matrix info
        shell: bash
        run: |
          echo "${{ matrix.folders }}"
      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
        # set the artifact folder names (because the character `/` is not allowed).
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'models/'/'models_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
      - name: Checkout to PR merge commit
        working-directory: /transformers
        run: |
            git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
            git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
            git log -1 --format=%H
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /transformers
        run: |
          export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
          echo $CUDA_VISIBLE_DEVICES
          python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
      - name: Make sure report directory exists
        shell: bash
        run: |
          mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
          echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
          path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
  update_run_status:
    name: Update Check Run Status
    needs: [get-sha, create_run, run_models_gpu]
    permissions: write-all
    if: ${{ always() && needs.create_run.result == 'success' }}
    runs-on: ubuntu-22.04
    env:
      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
    steps:
      - name: Get `run_models_gpu` job status
        run: |
          echo "${{ needs.run_models_gpu.result }}"
          if [ "${{ needs.run_models_gpu.result }}" = "cancelled" ]; then
            echo "STATUS=failure" >> $GITHUB_ENV
          elif [ "${{ needs.run_models_gpu.result }}" = "skipped" ]; then
            echo "STATUS=success" >> $GITHUB_ENV
          else
            echo "STATUS=${{ needs.run_models_gpu.result }}" >> $GITHUB_ENV
          fi
      - name: Update PR commit statuses
        run: |
          echo "${{ needs.run_models_gpu.result }}"
          echo "${{ env.STATUS }}"
          gh api \
            --method POST \
            -H "Accept: application/vnd.github+json" \
            -H "X-GitHub-Api-Version: 2022-11-28" \
            repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
            -f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Slow CI job" -f "context=pytest/custom-tests"
--- a/.github/workflows/self-pr-slow-ci.yml
+++ b/.github/workflows/self-pr-slow-ci.yml
@ -1,151 +0,0 @@
 name: PR slow CI
 on:
  pull_request:
    paths:
      - "src/transformers/models/*/modeling_*.py"
      - "tests/**/test_*.py"
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true
 env:
  HF_HOME: /mnt/cache
  TRANSFORMERS_IS_CI: yes
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  RUN_SLOW: yes
  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
  # This token is created under the bot `hf-transformers-bot`.
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
  RUN_PT_TF_CROSS_TESTS: 1
  CUDA_VISIBLE_DEVICES: 0,1
 jobs:
  find_models_to_run:
      runs-on: ubuntu-22.04
      name: Find models to run slow tests
      # Triggered only if the required label `run-slow` is added
      if: ${{ contains(github.event.pull_request.labels.*.name, 'run-slow') }}
      outputs:
        models: ${{ steps.models_to_run.outputs.models }}
      steps:
        - uses: actions/checkout@v4
          with:
            fetch-depth: "0"
            ref: ${{ github.event.pull_request.head.sha }}
        - name: Get commit message
          run: |
            echo "commit_message=$(git show -s --format=%s)" >> $GITHUB_ENV
        - name: Get models to run slow tests
          run: |
            echo "${{ env.commit_message }}"
            python -m pip install GitPython
            python utils/pr_slow_ci_models.py --commit_message "${{ env.commit_message }}" | tee output.txt
            echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
        - name: Models to run slow tests
          id: models_to_run
          run: |
            echo "${{ env.models }}"
            echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
  run_models_gpu:
      name: Run all tests for the model
      # Triggered only `find_models_to_run` is triggered (label `run-slow` is added) which gives the models to run
      # (either a new model PR or via a commit message)
      if: ${{ needs.find_models_to_run.outputs.models != '[]' }}
      needs: find_models_to_run
      strategy:
        fail-fast: false
        matrix:
          folders: ${{ fromJson(needs.find_models_to_run.outputs.models) }}
          machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
      runs-on:
        group: '${{ matrix.machine_type }}'
      container:
        image: huggingface/transformers-all-latest-gpu
        options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
      steps:
      - name: Echo input and matrix info
        shell: bash
        run: |
          echo "${{ matrix.folders }}"
      - name: Echo folder ${{ matrix.folders }}
        shell: bash
        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
        # set the artifact folder names (because the character `/` is not allowed).
        run: |
          echo "${{ matrix.folders }}"
          matrix_folders=${{ matrix.folders }}
          matrix_folders=${matrix_folders/'models/'/'models_'}
          echo "$matrix_folders"
          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/merge && git checkout pull/${{ github.event.pull_request.number }}/merge
      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . && python3 -m pip install --upgrade torch torchaudio torchvision
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Set `machine_type` for report and artifact names
        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
          fi
          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV    
      - name: Environment
        working-directory: /transformers
        run: |
          python3 utils/print_env.py
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
      - name: Run all tests on GPU
        working-directory: /transformers
        run: |
          export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
          echo $CUDA_VISIBLE_DEVICES
          python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
        run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
      - name: Make sure report directory exists
        shell: bash
        run: |
          mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
          echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
          path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
--- a/utils/pr_slow_ci_models.py
+++ b/utils/pr_slow_ci_models.py
@ -15,19 +15,20 @@
 """
 This script is used to get the models for which to run slow CI.
-A new model added in a pull request will be included, as well as models specified in a commit message with a prefix
+A new model added in a pull request will be included, as well as models specified in a GitHub pull request's comment
-`[run-slow]`, `[run_slow]` or `[run slow]`. For example, the commit message `[run_slow]bert, gpt2` will give `bert` and
+with a prefix `run-slow`, `run_slow` or `run slow`. For example, the commit message `run_slow: bert, gpt2` will give
-`gpt2`.
+`bert` and `gpt2`.
 Usage:
 ```bash
-python utils/pr_slow_ci_models.py.py
+python utils/pr_slow_ci_models.py
 ```
 """
 import argparse
 import re
 import string
 from pathlib import Path
 from typing import List
@ -89,7 +90,7 @@ def get_new_python_files() -> List[str]:
 def get_new_model():
    new_files = get_new_python_files()
-    reg = re.compile(r"src/transformers/(models/.*)/modeling_.*\.py")
+    reg = re.compile(r"src/transformers/models/(.*)/modeling_.*\.py")
    new_model = ""
    for x in new_files:
@ -101,45 +102,53 @@ def get_new_model():
    return new_model
-def parse_commit_message(commit_message: str) -> str:
+def parse_message(message: str) -> str:
    """
-    Parses the commit message to find the models specified in it to run slow CI.
+    Parses a GitHub pull request's comment to find the models specified in it to run slow CI.
    Args:
-        commit_message (`str`): The commit message of the current commit.
+        message (`str`): The body of a GitHub pull request's comment.
    Returns:
-        `str`: The substring in `commit_message` after `[run-slow]`, [run_slow]` or [run slow]`. If no such prefix is
+        `str`: The substring in `message` after `run-slow`, run_slow` or run slow`. If no such prefix is found, the
-         found, the empty string is returned.
+        empty string is returned.
    """
-    if commit_message is None:
+    if message is None:
        return ""
-    command_search = re.search(r"\[([^\]]*)\](.*)", commit_message)
+    message = message.strip().lower()
-    if command_search is None:
+
    # run-slow: model_1, model_2
    if not message.startswith(("run-slow", "run_slow", "run slow")):
        return ""
    message = message[len("run slow") :]
    # remove leading `:`
    while message.strip().startswith(":"):
        message = message.strip()[1:]
-    command = command_search.groups()[0]
+    return message
    command = command.lower().replace("-", " ").replace("_", " ")
    run_slow = command == "run slow"
    if run_slow:
        models = command_search.groups()[1].strip()
        return models
    else:
        return ""
-def get_models(commit_message: str):
+def get_models(message: str):
-    models = parse_commit_message(commit_message)
+    models = parse_message(message)
-    return [f"models/{x}" for x in models.replace(",", " ").split()]
+    return models.replace(",", " ").split()
 def check_model_names(model_name: str):
    allowed = string.ascii_letters + string.digits + "_"
    return not (model_name.startswith("_") or model_name.endswith("_")) and all(c in allowed for c in model_name)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
-    parser.add_argument("--commit_message", type=str, default="", help="The commit message.")
+    parser.add_argument("--message", type=str, default="", help="The content of a comment.")
    args = parser.parse_args()
    new_model = get_new_model()
-    specified_models = get_models(args.commit_message)
+    specified_models = get_models(args.message)
    models = ([] if new_model == "" else [new_model]) + specified_models
    # a guard for strange model names
    models = [model for model in models if check_model_names(model)]
    # Add "models/"
    models = [f"models/{model}" for model in models]
    print(sorted(set(models)))