Disable Flax GPU tests on push (#17042)

This commit is contained in:
Sylvain Gugger 2022-05-02 10:25:53 -04:00 committed by GitHub
parent bdd690a74d
commit 4be8b95a9f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -76,66 +76,66 @@ jobs:
name: run_all_tests_torch_gpu_test_reports name: run_all_tests_torch_gpu_test_reports
path: reports path: reports
run_tests_flax_gpu: # run_tests_flax_gpu:
runs-on: [self-hosted, docker-gpu-test, single-gpu] # runs-on: [self-hosted, docker-gpu-test, single-gpu]
container: # container:
image: tensorflow/tensorflow:2.4.1-gpu # image: tensorflow/tensorflow:2.4.1-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ # options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps: # steps:
- name: Set up Python 3.7 # - name: Set up Python 3.7
uses: actions/setup-python@v2 # uses: actions/setup-python@v2
with: # with:
python-version: 3.7 # python-version: 3.7
#
- name: Install dependencies # - name: Install dependencies
run: | # run: |
apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git espeak-ng # apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git espeak-ng
pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html # pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
pip install --upgrade pip # pip install --upgrade pip
pip install .[sklearn,testing,sentencepiece,flax,flax-speech,vision] # pip install .[sklearn,testing,sentencepiece,flax,flax-speech,vision]
#
- name: Launcher docker # - name: Launcher docker
uses: actions/checkout@v2 # uses: actions/checkout@v2
with: # with:
fetch-depth: 2 # fetch-depth: 2
#
- name: NVIDIA-SMI # - name: NVIDIA-SMI
continue-on-error: true # continue-on-error: true
run: | # run: |
nvidia-smi # nvidia-smi
#
- name: Are GPUs recognized by our DL frameworks # - name: Are GPUs recognized by our DL frameworks
run: | # run: |
python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)" # python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))" # python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
#
- name: Fetch the tests to run # - name: Fetch the tests to run
run: | # run: |
python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt # python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
#
- name: Report fetched tests # - name: Report fetched tests
uses: actions/upload-artifact@v2 # uses: actions/upload-artifact@v2
with: # with:
name: test_fetched # name: test_fetched
path: test_preparation.txt # path: test_preparation.txt
#
- name: Run all non-slow tests on GPU # - name: Run all non-slow tests on GPU
run: | # run: |
if [ -f test_list.txt ]; then # if [ -f test_list.txt ]; then
python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_gpu $(cat test_list.txt) # python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_gpu $(cat test_list.txt)
fi # fi
#
- name: Failure short reports # - name: Failure short reports
if: ${{ failure() }} # if: ${{ failure() }}
run: cat reports/tests_flax_gpu_failures_short.txt # run: cat reports/tests_flax_gpu_failures_short.txt
#
- name: Test suite reports artifacts # - name: Test suite reports artifacts
if: ${{ always() }} # if: ${{ always() }}
uses: actions/upload-artifact@v2 # uses: actions/upload-artifact@v2
with: # with:
name: run_all_tests_flax_gpu_test_reports # name: run_all_tests_flax_gpu_test_reports
path: reports # path: reports
#
# run_tests_tf_gpu: # run_tests_tf_gpu:
# runs-on: [self-hosted, docker-gpu, single-gpu] # runs-on: [self-hosted, docker-gpu, single-gpu]
# timeout-minutes: 120 # timeout-minutes: 120