Add offline runners info in the Slack report (#19169)

* send slack report for offline runners

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar 2022-09-23 19:23:05 +02:00 committed by GitHub
parent 49bf569830
commit 0cea8d5555
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 34 additions and 3 deletions

View File

@ -19,6 +19,8 @@ jobs:
check_runner_status:
name: Check Runner Status
runs-on: ubuntu-latest
outputs:
offline_runners: ${{ steps.set-offline_runners.outputs.offline_runners }}
steps:
- name: Checkout transformers
uses: actions/checkout@v2
@ -26,7 +28,14 @@ jobs:
fetch-depth: 2
- name: Check Runner Status
run: python utils/check_self_hosted_runner.py --target_runners single-gpu-ci-runner-docker,multi-gpu-ci-runner-docker,single-gpu-scheduled-ci-runner-docker,multi-scheduled-scheduled-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
run: python utils/check_self_hosted_runner.py --target_runners single-gpu-ci-runner-docker,multi-gpu-ci-runner-docker,single-gpu-scheduled-ci-runner-docker,multi-scheduled-scheduled-ci-runner-docker,single-gpu-doctest-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
- id: set-offline_runners
name: Set output for offline runners
if: ${{ always() }}
run: |
offline_runners=$(python3 -c 'fp = open("offline_runners.txt"); failed = fp.read(); fp.close(); print(failed)')
echo "::set-output name=offline_runners::$offline_runners"
send_results:
name: Send results to webhook
@ -50,6 +59,7 @@ jobs:
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
CI_EVENT: runner status check
RUNNER_STATUS: ${{ needs.check_runner_status.result }}
OFFLINE_RUNNERS: ${{ needs.check_runner_status.outputs.offline_runners }}
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
run: |

View File

@ -5,6 +5,8 @@ import subprocess
def get_runner_status(target_runners, token):
offline_runners = []
cmd = (
f'curl -H "Accept: application/vnd.github+json" -H "Authorization: Bearer {token}"'
" https://api.github.com/repos/huggingface/transformers/actions/runners"
@ -17,7 +19,15 @@ def get_runner_status(target_runners, token):
for runner in runners:
if runner["name"] in target_runners:
if runner["status"] == "offline":
raise ValueError(f"{runner['name']} is offline!")
offline_runners.append(runner)
# save the result so we can report them on Slack
with open("offline_runners.txt", "w") as fp:
fp.write(json.dumps(offline_runners))
if len(offline_runners) > 0:
failed = "\n".join(offline_runners)
raise ValueError(f"The following runners are offline:\n{failed}")
if __name__ == "__main__":

View File

@ -397,8 +397,12 @@ class Message:
ci_title_block = {"type": "section", "text": {"type": "mrkdwn", "text": ci_title}}
blocks.append(ci_title_block)
offline_runners = []
if runner_not_available:
text = "💔 CI runners are not available! Tests are not run. 😭"
result = os.environ.get("OFFLINE_RUNNERS")
if result is not None:
offline_runners = json.loads(result)
elif runner_failed:
text = "💔 CI runners have problems! Tests are not run. 😭"
elif setup_failed:
@ -413,11 +417,18 @@ class Message:
"text": text,
},
}
text = ""
if len(offline_runners) > 0:
text = "\n" + "\n".join(offline_runners)
text = f"The following runners are offline:\n{text}\n\n"
text += "🙏 Let's fix it ASAP! 🙏"
error_block_2 = {
"type": "section",
"text": {
"type": "plain_text",
"text": "🙏 Let's fix it ASAP! 🙏",
"text": text,
},
"accessory": {
"type": "button",