diff --git a/.github/workflows/self-new-model-pr-caller.yml b/.github/workflows/self-new-model-pr-caller.yml new file mode 100644 index 00000000000..6af7cc71d91 --- /dev/null +++ b/.github/workflows/self-new-model-pr-caller.yml @@ -0,0 +1,125 @@ +name: PR slow CI + +on: + pull_request: + +env: + HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes + OMP_NUM_THREADS: 8 + MKL_NUM_THREADS: 8 + RUN_SLOW: yes + # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. + # This token is created under the bot `hf-transformers-bot`. + HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} + SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} + TF_FORCE_GPU_ALLOW_GROWTH: true + RUN_PT_TF_CROSS_TESTS: 1 + CUDA_VISIBLE_DEVICES: 0,1 + +jobs: + check_for_new_model: + runs-on: ubuntu-22.04 + name: Check if a PR is a new model PR + outputs: + new_model: ${{ steps.check_new_model.outputs.new_model }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: "0" + + - name: Check if there is a new model + id: check_new_model + run: | + python -m pip install GitPython + echo "new_model=$(python utils/check_if_new_model_added.py | tail -n 1)" >> $GITHUB_OUTPUT + + run_new_model_tests: + name: Run all tests for the new model + # Triggered if it is a new model PR and the required label is added + if: ${{ needs.check_for_new_model.outputs.new_model != '' && contains(github.event.pull_request.labels.*.name, 'single-model-run-slow') }} + needs: check_for_new_model + strategy: + fail-fast: false + matrix: + folders: ["${{ needs.check_for_new_model.outputs.new_model }}"] + machine_type: [single-gpu, multi-gpu] + runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + container: + image: huggingface/transformers-all-latest-gpu + options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + steps: + - name: Echo input and matrix info + shell: bash + run: | + echo "${{ matrix.folders }}" + + - name: Echo folder ${{ matrix.folders }} + shell: bash + # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to + # set the artifact folder names (because the character `/` is not allowed). + run: | + echo "${{ matrix.folders }}" + matrix_folders=${{ matrix.folders }} + matrix_folders=${matrix_folders/'models/'/'models_'} + echo "$matrix_folders" + echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV + + - name: Update clone + working-directory: /transformers + run: git fetch && git checkout ${{ github.event.pull_request.head.sha }} + + - name: Reinstall transformers in edit mode (remove the one installed during docker image build) + working-directory: /transformers + run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + + - name: NVIDIA-SMI + run: | + nvidia-smi + + - name: Environment + working-directory: /transformers + run: | + python3 utils/print_env.py + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - name: Run all tests on GPU + working-directory: /transformers + run: python3 -m pytest -v -rs --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} + + - name: Failure short reports + if: ${{ failure() }} + continue-on-error: true + run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt + + - name: Make sure report directory exists + shell: bash + run: | + mkdir -p /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} + echo "hello" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/hello.txt + echo "${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}" + + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} + + slow_test_result: + runs-on: ubuntu-22.04 + name: Check slow test status + needs: [check_for_new_model, run_new_model_tests] + if: always() + steps: + - name: Check test status + shell: bash + # NOT a new model PR --> pass + # new model PR --> pass only if `run_new_model_tests` gives `success` (so if the label is not added, we fail + # this job even if `run_new_model_tests` has `skipped` status). + run: | + echo "${{ needs.run_new_model_tests.result }}" + if [ "${{ needs.check_for_new_model.outputs.new_model }}" = "" ]; then echo "not new model"; elif [ "${{ needs.run_new_model_tests.result }}" != "success" ]; then echo "failure"; exit -1; else echo "pass"; fi; diff --git a/utils/check_if_new_model_added.py b/utils/check_if_new_model_added.py new file mode 100644 index 00000000000..14b040b2861 --- /dev/null +++ b/utils/check_if_new_model_added.py @@ -0,0 +1,96 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This script is used to get the directory of the modeling file that is added in a pull request (i.e. a new model PR). + +Usage: + +```bash +python utils/check_if_new_model_added.py +``` +""" + +import re +from pathlib import Path +from typing import List + +from git import Repo + + +PATH_TO_REPO = Path(__file__).parent.parent.resolve() + + +def get_new_python_files_between_commits(base_commit: str, commits: List[str]) -> List[str]: + """ + Get the list of added python files between a base commit and one or several commits. + + Args: + repo (`git.Repo`): + A git repository (for instance the Transformers repo). + base_commit (`str`): + The commit reference of where to compare for the diff. This is the current commit, not the branching point! + commits (`List[str]`): + The list of commits with which to compare the repo at `base_commit` (so the branching point). + + Returns: + `List[str]`: The list of python files added between a base commit and one or several commits. + """ + code_diff = [] + for commit in commits: + for diff_obj in commit.diff(base_commit): + # We always add new python files + if diff_obj.change_type == "A" and diff_obj.b_path.endswith(".py"): + code_diff.append(diff_obj.b_path) + + return code_diff + + +def get_new_python_files() -> List[str]: + """ + Return a list of python files that have been added between the current head and the main branch. + + Returns: + `List[str]`: The list of python files added. + """ + repo = Repo(PATH_TO_REPO) + + try: + # For the cases where the main branch exists locally + main = repo.refs.main + except AttributeError: + # On GitHub Actions runners, it doesn't have local main branch + main = repo.remotes.origin.refs.main + + print(f"main is at {main.commit}") + print(f"Current head is at {repo.head.commit}") + + branching_commits = repo.merge_base(main, repo.head) + for commit in branching_commits: + print(f"Branching commit: {commit}") + return get_new_python_files_between_commits(repo.head.commit, branching_commits) + + +if __name__ == "__main__": + new_files = get_new_python_files() + reg = re.compile(r"src/transformers/(models/.*)/modeling_.*\.py") + + new_model = "" + for x in new_files: + find_new_model = reg.findall(x) + if len(find_new_model) > 0: + new_model = find_new_model[0] + # It's unlikely we have 2 new modeling files in a pull request. + break + print(new_model)