mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-03 03:31:05 +06:00

* up * up * up * make it cleaner * correct * make styhahalal * add more tests * finish * small fix * make style * up * tryout to solve cicrle ci * up * fix more tests * fix more tests * apply sylvains suggestions * fix import * correct docs * add pyctcdecode only to speech tests * fix more tests * add tf, flax and pt tests * add pt * fix last tests * fix more tests * Apply suggestions from code review * change lines * Apply suggestions from code review Co-authored-by: Anton Lozhkov <aglozhkov@gmail.com> * correct tests * correct tests * add doc string Co-authored-by: Anton Lozhkov <aglozhkov@gmail.com>
478 lines
16 KiB
YAML
478 lines
16 KiB
YAML
name: Self-hosted runner (scheduled)
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- multi_ci_*
|
|
repository_dispatch:
|
|
schedule:
|
|
- cron: "0 0 * * *"
|
|
|
|
env:
|
|
HF_HOME: /mnt/cache
|
|
TRANSFORMERS_IS_CI: yes
|
|
RUN_SLOW: yes
|
|
OMP_NUM_THREADS: 16
|
|
MKL_NUM_THREADS: 16
|
|
PYTEST_TIMEOUT: 600
|
|
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
|
|
|
jobs:
|
|
run_all_tests_torch_gpu:
|
|
runs-on: [self-hosted, docker-gpu, single-gpu]
|
|
container:
|
|
image: pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime
|
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
steps:
|
|
- name: Launcher docker
|
|
uses: actions/checkout@v2
|
|
|
|
- name: NVIDIA-SMI
|
|
run: |
|
|
nvidia-smi
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
apt -y update && apt install -y libsndfile1-dev git
|
|
pip install --upgrade pip
|
|
pip install .[integrations,sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm]
|
|
pip install https://github.com/kpu/kenlm/archive/master.zip
|
|
|
|
- name: Are GPUs recognized by our DL frameworks
|
|
run: |
|
|
utils/print_env_pt.py
|
|
|
|
- name: Run all tests on GPU
|
|
run: |
|
|
python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_torch_gpu tests
|
|
|
|
- name: Failure short reports
|
|
if: ${{ always() }}
|
|
run: cat reports/tests_torch_gpu_failures_short.txt
|
|
|
|
- name: Run examples tests on GPU
|
|
if: ${{ always() }}
|
|
env:
|
|
OMP_NUM_THREADS: 16
|
|
MKL_NUM_THREADS: 16
|
|
RUN_SLOW: yes
|
|
HF_HOME: /mnt/cache
|
|
TRANSFORMERS_IS_CI: yes
|
|
run: |
|
|
pip install -r examples/pytorch/_tests_requirements.txt
|
|
python -m pytest -n 1 -v --dist=loadfile --make-reports=examples_torch_gpu examples
|
|
|
|
- name: Failure short reports
|
|
if: ${{ always() }}
|
|
run: cat reports/examples_torch_gpu_failures_short.txt
|
|
|
|
- name: Run all pipeline tests on GPU
|
|
if: ${{ always() }}
|
|
env:
|
|
RUN_PIPELINE_TESTS: yes
|
|
run: |
|
|
python -m pytest -n 1 -v --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests
|
|
|
|
- name: Failure short reports
|
|
if: ${{ always() }}
|
|
run: cat reports/tests_torch_pipeline_gpu_failures_short.txt
|
|
|
|
- name: Test suite reports artifacts
|
|
if: ${{ always() }}
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: run_all_tests_torch_gpu_test_reports
|
|
path: reports
|
|
|
|
run_all_tests_flax_gpu:
|
|
runs-on: [self-hosted, docker-gpu-test, single-gpu]
|
|
container:
|
|
image: tensorflow/tensorflow:2.4.1-gpu
|
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
steps:
|
|
- name: Launcher docker
|
|
uses: actions/checkout@v2
|
|
|
|
- name: NVIDIA-SMI
|
|
continue-on-error: true
|
|
run: |
|
|
nvidia-smi
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
pip install --upgrade pip
|
|
pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
|
|
pip install .[flax,integrations,sklearn,testing,sentencepiece,flax-speech,vision]
|
|
pip install https://github.com/kpu/kenlm/archive/master.zip
|
|
|
|
- name: Are GPUs recognized by our DL frameworks
|
|
run: |
|
|
python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
|
|
python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
|
|
|
|
- name: Run all tests on GPU
|
|
run: |
|
|
python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_flax_gpu tests
|
|
|
|
- name: Failure short reports
|
|
if: ${{ always() }}
|
|
run: cat reports/tests_flax_gpu_failures_short.txt
|
|
|
|
- name: Test suite reports artifacts
|
|
if: ${{ always() }}
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: run_all_tests_flax_gpu_test_reports
|
|
path: reports
|
|
|
|
run_all_tests_tf_gpu:
|
|
runs-on: [self-hosted, docker-gpu, single-gpu]
|
|
container:
|
|
image: tensorflow/tensorflow:2.4.1-gpu
|
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
steps:
|
|
- name: Launcher docker
|
|
uses: actions/checkout@v2
|
|
|
|
- name: NVIDIA-SMI
|
|
run: |
|
|
nvidia-smi
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
apt -y update && apt install -y libsndfile1-dev git
|
|
pip install --upgrade pip
|
|
pip install .[sklearn,testing,onnx,sentencepiece,tf-speech,vision]
|
|
pip install https://github.com/kpu/kenlm/archive/master.zip
|
|
|
|
|
|
- name: Are GPUs recognized by our DL frameworks
|
|
run: |
|
|
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
|
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
|
|
|
- name: Run all tests on GPU
|
|
env:
|
|
TF_NUM_INTEROP_THREADS: 1
|
|
TF_NUM_INTRAOP_THREADS: 16
|
|
run: |
|
|
python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_tf_gpu tests
|
|
|
|
- name: Failure short reports
|
|
if: ${{ always() }}
|
|
run: cat reports/tests_tf_gpu_failures_short.txt
|
|
|
|
- name: Run all pipeline tests on GPU
|
|
if: ${{ always() }}
|
|
env:
|
|
RUN_PIPELINE_TESTS: yes
|
|
TF_NUM_INTEROP_THREADS: 1
|
|
TF_NUM_INTRAOP_THREADS: 16
|
|
run: |
|
|
python -m pytest -n 1 -v --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_gpu tests
|
|
|
|
- name: Failure short reports
|
|
if: ${{ always() }}
|
|
run: cat reports/tests_tf_pipeline_gpu_failures_short.txt
|
|
|
|
- name: Test suite reports artifacts
|
|
if: ${{ always() }}
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: run_all_tests_tf_gpu_test_reports
|
|
path: reports
|
|
|
|
run_all_examples_torch_xla_tpu:
|
|
runs-on: [self-hosted, docker-tpu-test, tpu-v3-8]
|
|
container:
|
|
image: gcr.io/tpu-pytorch/xla:nightly_3.8_tpuvm
|
|
options: --privileged -v "/lib/libtpu.so:/lib/libtpu.so" -v /mnt/cache/.cache/huggingface:/mnt/cache/ --shm-size 16G
|
|
steps:
|
|
- name: Launcher docker
|
|
uses: actions/checkout@v2
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
pip install --upgrade pip
|
|
pip install .[testing]
|
|
|
|
- name: Are TPUs recognized by our DL frameworks
|
|
env:
|
|
XRT_TPU_CONFIG: localservice;0;localhost:51011
|
|
run: |
|
|
python -c "import torch_xla.core.xla_model as xm; print(xm.xla_device())"
|
|
|
|
- name: Run example tests on TPU
|
|
env:
|
|
XRT_TPU_CONFIG: "localservice;0;localhost:51011"
|
|
MKL_SERVICE_FORCE_INTEL: "1" # See: https://github.com/pytorch/pytorch/issues/37377
|
|
|
|
run: |
|
|
python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_torch_xla_tpu examples/pytorch/test_xla_examples.py
|
|
|
|
- name: Failure short reports
|
|
if: ${{ always() }}
|
|
run: cat reports/tests_torch_xla_tpu_failures_short.txt
|
|
|
|
- name: Test suite reports artifacts
|
|
if: ${{ always() }}
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: run_all_examples_torch_xla_tpu
|
|
path: reports
|
|
|
|
run_all_tests_torch_multi_gpu:
|
|
runs-on: [self-hosted, docker-gpu, multi-gpu]
|
|
container:
|
|
image: pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime
|
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
steps:
|
|
- name: Launcher docker
|
|
uses: actions/checkout@v2
|
|
|
|
- name: NVIDIA-SMI
|
|
continue-on-error: true
|
|
run: |
|
|
nvidia-smi
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
apt -y update && apt install -y libsndfile1-dev git
|
|
pip install --upgrade pip
|
|
pip install .[integrations,sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm]
|
|
pip install https://github.com/kpu/kenlm/archive/master.zip
|
|
|
|
- name: Are GPUs recognized by our DL frameworks
|
|
run: |
|
|
utils/print_env_pt.py
|
|
|
|
- name: Run all tests on GPU
|
|
env:
|
|
MKL_SERVICE_FORCE_INTEL: 1
|
|
run: |
|
|
python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_torch_multi_gpu tests
|
|
|
|
- name: Failure short reports
|
|
if: ${{ always() }}
|
|
run: cat reports/tests_torch_multi_gpu_failures_short.txt
|
|
|
|
- name: Run all pipeline tests on GPU
|
|
if: ${{ always() }}
|
|
env:
|
|
RUN_PIPELINE_TESTS: yes
|
|
run: |
|
|
python -m pytest -n 1 -v --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests
|
|
|
|
- name: Failure short reports
|
|
if: ${{ always() }}
|
|
run: cat reports/tests_torch_pipeline_multi_gpu_failures_short.txt
|
|
|
|
- name: Test suite reports artifacts
|
|
if: ${{ always() }}
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: run_all_tests_torch_multi_gpu_test_reports
|
|
path: reports
|
|
|
|
run_all_tests_tf_multi_gpu:
|
|
runs-on: [self-hosted, docker-gpu, multi-gpu]
|
|
container:
|
|
image: tensorflow/tensorflow:2.4.1-gpu
|
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
steps:
|
|
- name: Launcher docker
|
|
uses: actions/checkout@v2
|
|
|
|
- name: NVIDIA-SMI
|
|
continue-on-error: true
|
|
run: |
|
|
nvidia-smi
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
apt -y update && apt install -y libsndfile1-dev git
|
|
pip install --upgrade pip
|
|
pip install .[sklearn,testing,onnx,sentencepiece,tf-speech,vision]
|
|
pip install https://github.com/kpu/kenlm/archive/master.zip
|
|
|
|
- name: Are GPUs recognized by our DL frameworks
|
|
run: |
|
|
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
|
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
|
|
|
- name: Run all tests on GPU
|
|
env:
|
|
TF_NUM_INTEROP_THREADS: 1
|
|
TF_NUM_INTRAOP_THREADS: 16
|
|
run: |
|
|
python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_tf_multi_gpu tests
|
|
|
|
- name: Failure short reports
|
|
if: ${{ always() }}
|
|
run: cat reports/tests_tf_multi_gpu_failures_short.txt
|
|
|
|
- name: Run all pipeline tests on GPU
|
|
if: ${{ always() }}
|
|
env:
|
|
RUN_PIPELINE_TESTS: yes
|
|
TF_NUM_INTEROP_THREADS: 1
|
|
TF_NUM_INTRAOP_THREADS: 16
|
|
run: |
|
|
python -m pytest -n 1 -v --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests
|
|
|
|
- name: Failure short reports
|
|
if: ${{ always() }}
|
|
run: cat reports/tests_tf_pipeline_multi_gpu_failures_short.txt
|
|
|
|
- name: Test suite reports artifacts
|
|
if: ${{ always() }}
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: run_all_tests_tf_multi_gpu_test_reports
|
|
path: reports
|
|
|
|
# run_all_tests_flax_multi_gpu:
|
|
# runs-on: [self-hosted, docker-gpu, multi-gpu]
|
|
# container:
|
|
# image: tensorflow/tensorflow:2.4.1-gpu
|
|
# options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
# steps:
|
|
# - name: Launcher docker
|
|
# uses: actions/checkout@v2
|
|
#
|
|
# - name: NVIDIA-SMI
|
|
# run: |
|
|
# nvidia-smi
|
|
#
|
|
# - name: Install dependencies
|
|
# run: |
|
|
# pip install --upgrade pip
|
|
# pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
|
|
# pip install .[flax,integrations,sklearn,testing,sentencepiece,flax-speech,vision]
|
|
#
|
|
# - name: Are GPUs recognized by our DL frameworks
|
|
# run: |
|
|
# python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
|
|
# python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
|
|
#
|
|
# - name: Run all tests on GPU
|
|
# run: |
|
|
# python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_flax_gpu tests
|
|
#
|
|
# - name: Failure short reports
|
|
# if: ${{ always() }}
|
|
# run: cat reports/tests_flax_gpu_failures_short.txt
|
|
#
|
|
# - name: Test suite reports artifacts
|
|
# if: ${{ always() }}
|
|
# uses: actions/upload-artifact@v2
|
|
# with:
|
|
# name: run_all_tests_flax_gpu_test_reports
|
|
# path: reports
|
|
|
|
run_all_tests_torch_cuda_extensions_gpu:
|
|
runs-on: [self-hosted, docker-gpu, single-gpu]
|
|
container:
|
|
image: nvcr.io/nvidia/pytorch:21.03-py3
|
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
steps:
|
|
- name: Launcher docker
|
|
uses: actions/checkout@v2
|
|
|
|
- name: NVIDIA-SMI
|
|
run: |
|
|
nvidia-smi
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
apt -y update && apt install -y libaio-dev
|
|
pip install --upgrade pip
|
|
pip install .[testing,deepspeed]
|
|
|
|
- name: Are GPUs recognized by our DL frameworks
|
|
run: |
|
|
utils/print_env_pt.py
|
|
|
|
- name: Run all tests on GPU
|
|
run: |
|
|
python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
|
|
|
|
- name: Failure short reports
|
|
if: ${{ always() }}
|
|
run: cat reports/tests_torch_cuda_extensions_gpu_failures_short.txt
|
|
|
|
- name: Test suite reports artifacts
|
|
if: ${{ always() }}
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: run_tests_torch_cuda_extensions_gpu_test_reports
|
|
path: reports
|
|
|
|
run_all_tests_torch_cuda_extensions_multi_gpu:
|
|
runs-on: [self-hosted, docker-gpu, multi-gpu]
|
|
container:
|
|
image: nvcr.io/nvidia/pytorch:21.03-py3
|
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
steps:
|
|
- name: Launcher docker
|
|
uses: actions/checkout@v2
|
|
|
|
- name: NVIDIA-SMI
|
|
continue-on-error: true
|
|
run: |
|
|
nvidia-smi
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
apt -y update && apt install -y libaio-dev
|
|
pip install --upgrade pip
|
|
rm -rf ~/.cache/torch_extensions/ # shared between conflicting builds
|
|
pip install .[testing,deepspeed,fairscale]
|
|
|
|
- name: Are GPUs recognized by our DL frameworks
|
|
run: |
|
|
utils/print_env_pt.py
|
|
|
|
- name: Run all tests on GPU
|
|
run: |
|
|
python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_torch_cuda_extensions_multi_gpu tests/deepspeed tests/extended
|
|
|
|
- name: Failure short reports
|
|
if: ${{ always() }}
|
|
run: cat reports/tests_torch_cuda_extensions_multi_gpu_failures_short.txt
|
|
|
|
- name: Test suite reports artifacts
|
|
if: ${{ always() }}
|
|
uses: actions/upload-artifact@v2
|
|
with:
|
|
name: run_tests_torch_cuda_extensions_multi_gpu_test_reports
|
|
path: reports
|
|
|
|
send_results:
|
|
name: Send results to webhook
|
|
runs-on: ubuntu-latest
|
|
if: always()
|
|
needs: [
|
|
run_all_tests_torch_gpu,
|
|
run_all_tests_tf_gpu,
|
|
run_all_tests_torch_multi_gpu,
|
|
run_all_tests_tf_multi_gpu,
|
|
run_all_tests_torch_cuda_extensions_gpu,
|
|
run_all_tests_torch_cuda_extensions_multi_gpu
|
|
]
|
|
steps:
|
|
- uses: actions/checkout@v2
|
|
|
|
- uses: actions/download-artifact@v2
|
|
|
|
- name: Send message to Slack
|
|
env:
|
|
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
|
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
|
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
|
|
|
|
|
|
run: |
|
|
pip install slack_sdk
|
|
python utils/notification_service.py scheduled
|