diff --git a/.github/workflows/self-scheduled-amd-caller.yml b/.github/workflows/self-scheduled-amd-caller.yml index 4755bd86824..dc5c7b7e905 100644 --- a/.github/workflows/self-scheduled-amd-caller.yml +++ b/.github/workflows/self-scheduled-amd-caller.yml @@ -3,23 +3,12 @@ name: Self-hosted runner (AMD scheduled CI caller) on: schedule: - cron: "17 2 * * *" - push: - branches: - - run_amd_scheduled_ci_caller* jobs: - run_amd_ci_mi210: - name: AMD mi210 - if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller'))) - uses: ./.github/workflows/self-scheduled-amd.yml - with: - gpu_flavor: mi210 - secrets: inherit - - run_amd_ci_mi250: - name: AMD mi250 - if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller'))) - uses: ./.github/workflows/self-scheduled-amd.yml - with: - gpu_flavor: mi250 - secrets: inherit + run_scheduled_amd_ci: + name: Trigger Scheduled AMD CI + runs-on: ubuntu-22.04 + if: ${{ always() }} + steps: + - name: Trigger scheduled AMD CI via workflow_run + run: echo "Trigger scheduled AMD CI via workflow_run" diff --git a/.github/workflows/self-scheduled-amd-mi210-caller.yml b/.github/workflows/self-scheduled-amd-mi210-caller.yml new file mode 100644 index 00000000000..ceaba454ae6 --- /dev/null +++ b/.github/workflows/self-scheduled-amd-mi210-caller.yml @@ -0,0 +1,19 @@ +name: Self-hosted runner (AMD mi210 scheduled CI caller) + +on: + workflow_run: + workflows: ["Self-hosted runner (AMD scheduled CI caller)"] + branches: ["main"] + types: [completed] + push: + branches: + - run_amd_scheduled_ci_caller* + +jobs: + run_amd_ci: + name: AMD mi210 + if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller'))) + uses: ./.github/workflows/self-scheduled-amd.yml + with: + gpu_flavor: mi210 + secrets: inherit diff --git a/.github/workflows/self-scheduled-amd-mi250-caller.yml b/.github/workflows/self-scheduled-amd-mi250-caller.yml new file mode 100644 index 00000000000..843e3476342 --- /dev/null +++ b/.github/workflows/self-scheduled-amd-mi250-caller.yml @@ -0,0 +1,19 @@ +name: Self-hosted runner (AMD mi250 scheduled CI caller) + +on: + workflow_run: + workflows: ["Self-hosted runner (AMD scheduled CI caller)"] + branches: ["main"] + types: [completed] + push: + branches: + - run_amd_scheduled_ci_caller* + +jobs: + run_amd_ci: + name: AMD mi250 + if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller'))) + uses: ./.github/workflows/self-scheduled-amd.yml + with: + gpu_flavor: mi250 + secrets: inherit diff --git a/.github/workflows/self-scheduled-amd.yml b/.github/workflows/self-scheduled-amd.yml index 17e907e40a5..ef1c4ddaa07 100644 --- a/.github/workflows/self-scheduled-amd.yml +++ b/.github/workflows/self-scheduled-amd.yml @@ -438,7 +438,7 @@ jobs: CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_AMD }} ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - CI_EVENT: Scheduled CI (AMD) + CI_EVENT: Scheduled CI (AMD) - ${{ inputs.gpu_flavor }} CI_SHA: ${{ github.sha }} CI_WORKFLOW_REF: ${{ github.workflow_ref }} RUNNER_STATUS: ${{ needs.check_runner_status.result }} diff --git a/utils/notification_service.py b/utils/notification_service.py index 548d3a9b2d8..17ce31c59a0 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -119,10 +119,17 @@ class Message: # Failures and success of the additional tests self.n_additional_success = sum(r["success"] for r in additional_results.values()) - all_additional_failures = dicts_to_sum([r["failed"] for r in additional_results.values()]) - self.n_additional_single_gpu_failures = all_additional_failures["single"] - self.n_additional_multi_gpu_failures = all_additional_failures["multi"] - self.n_additional_unknown_gpu_failures = all_additional_failures["unclassified"] + if len(additional_results) > 0: + # `dicts_to_sum` uses `dicts_to_sum` which requires a non empty dictionary. Let's just add an empty entry. + all_additional_failures = dicts_to_sum([r["failed"] for r in additional_results.values()]) + self.n_additional_single_gpu_failures = all_additional_failures["single"] + self.n_additional_multi_gpu_failures = all_additional_failures["multi"] + self.n_additional_unknown_gpu_failures = all_additional_failures["unclassified"] + else: + self.n_additional_single_gpu_failures = 0 + self.n_additional_multi_gpu_failures = 0 + self.n_additional_unknown_gpu_failures = 0 + self.n_additional_failures = ( self.n_additional_single_gpu_failures + self.n_additional_multi_gpu_failures @@ -903,6 +910,9 @@ if __name__ == "__main__": elif ci_event.startswith("Push CI (AMD) - "): flavor = ci_event.replace("Push CI (AMD) - ", "") job_name_prefix = f"AMD {flavor}" + elif ci_event.startswith("Scheduled CI (AMD) - "): + flavor = ci_event.replace("Scheduled CI (AMD) - ", "") + job_name_prefix = f"AMD {flavor}" for model in model_results.keys(): for artifact_path in available_artifacts[f"run_all_tests_gpu_{model}_test_reports"].paths: