[ci] Run all tests on (self-hosted) GPU (#3020)

* Create self-hosted.yml * Update self-hosted.yml * Update self-hosted.yml * Update self-hosted.yml * Update self-hosted.yml * Update self-hosted.yml * do not run slow tests, for now * [ci] For comparison with circleci, let's also run CPU-tests * [ci] reorganize * clearer filenames * [ci] Final tweaks before merging * rm slow tests on circle ci * Trigger CI * On GPU this concurrency was way too high
2025-07-31 10:12:23 +06:00 · 2020-02-28 21:11:08 -05:00 · 2020-02-28 21:11:08 -05:00 · e36bd94345
commit e36bd94345
parent 908fa43b54
4 changed files with 116 additions and 16 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -14,22 +14,6 @@ jobs:
            - run: sudo pip install codecov pytest-cov
            - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
            - run: codecov
-    run_all_tests_torch_and_tf:
-        working_directory: ~/transformers
-        docker:
-            - image: circleci/python:3.5
-        environment:
-            OMP_NUM_THREADS: 1
-            RUN_SLOW: yes
-            RUN_CUSTOM_TOKENIZERS: yes
-        resource_class: xlarge
-        parallelism: 1
-        steps:
-            - checkout
-            - run: sudo pip install .[mecab,sklearn,tf-cpu,torch,testing]
-            - run:
-                command: python -m pytest -n 8 --dist=loadfile -s -v ./tests/
-                no_output_timeout: 4h

    run_tests_torch:
        working_directory: ~/transformers
--- a/.github/workflows/github-push.yml
+++ b/.github/workflows/github-push.yml
@ -0,0 +1,19 @@
+name: GitHub-hosted runner
+
+on: push
+
+jobs:
+  check_code_quality:
+    runs-on: ubuntu-18.04
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v1
+      with:
+        python-version: 3.7
+    - name: Install dependencies
+      run: |
+        pip install .[tf,torch,quality]
+
+
+
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@ -0,0 +1,47 @@
+name: Self-hosted runner (push)
+
+on: 
+  push:
+    branches:
+      - master
+  pull_request:
+
+
+jobs:
+  run_tests_torch_and_tf_gpu:
+    runs-on: self-hosted
+    steps:
+    - uses: actions/checkout@v2
+    - name: Python version
+      run: |
+        which python
+        python --version
+        pip --version
+    - name: Current dir
+      run: pwd
+    - run: nvidia-smi
+    - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
+      run: |
+        python -m venv .env
+        source .env/bin/activate
+        which python
+        python --version
+        pip --version
+    - name: Install dependencies
+      run: |
+        source .env/bin/activate
+        pip install .[sklearn,tf,torch,testing]
+
+    - name: Are GPUs recognized by our DL frameworks
+      run: |
+        source .env/bin/activate
+        python -c "import torch; print(torch.cuda.is_available())"
+        python -c "import tensorflow as tf; print(tf.test.is_built_with_cuda(), tf.config.list_physical_devices('GPU'))"
+
+    - name: Run all non-slow tests on GPU
+      env:
+        OMP_NUM_THREADS: 1
+        USE_CUDA: yes
+      run: |
+        source .env/bin/activate
+        python -m pytest -n 2 --dist=loadfile -s -v ./tests/
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@ -0,0 +1,50 @@
+name: Self-hosted runner (scheduled)
+
+on:
+  push:
+    branches:
+      - ci_*
+  repository_dispatch:
+  schedule:
+    - cron: "0 0 * * *"
+
+jobs:
+  run_all_tests_torch_and_tf_gpu:
+    runs-on: self-hosted
+    steps:
+    - uses: actions/checkout@v2
+    - name: Python version
+      run: |
+        which python
+        python --version
+        pip --version
+    - name: Current dir
+      run: pwd
+    - run: nvidia-smi
+    - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
+      run: |
+        python -m venv .env
+        source .env/bin/activate
+        which python
+        python --version
+        pip --version
+    - name: Install dependencies
+      run: |
+        source .env/bin/activate
+        pip install .[sklearn,tf,torch,testing]
+
+    - name: Are GPUs recognized by our DL frameworks
+      run: |
+        source .env/bin/activate
+        python -c "import torch; print(torch.cuda.is_available())"
+        python -c "import tensorflow as tf; print(tf.test.is_built_with_cuda(), tf.config.list_physical_devices('GPU'))"
+
+    - name: Run all tests on GPU
+      env:
+        OMP_NUM_THREADS: 1
+        RUN_SLOW: yes
+        USE_CUDA: yes
+      run: |
+        source .env/bin/activate
+        python -m pytest -n 1 --dist=loadfile -s -v ./tests/
+