mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 13:20:12 +06:00
Update daily ci to use new cluster (#33627)
* update * re-enable daily CI --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
077b552f07
commit
75c878da1e
36
.github/workflows/model_jobs.yml
vendored
36
.github/workflows/model_jobs.yml
vendored
@ -41,7 +41,8 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
|
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
|
||||||
runs-on: ['${{ inputs.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
|
runs-on:
|
||||||
|
group: '${{ inputs.machine_type }}'
|
||||||
container:
|
container:
|
||||||
image: ${{ inputs.docker }}
|
image: ${{ inputs.docker }}
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@ -97,25 +98,42 @@ jobs:
|
|||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: pip freeze
|
run: pip freeze
|
||||||
|
|
||||||
|
- name: Set `machine_type` for report and artifact names
|
||||||
|
working-directory: /transformers
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "${{ inputs.machine_type }}"
|
||||||
|
|
||||||
|
if [ "${{ inputs.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||||
|
machine_type=single-gpu
|
||||||
|
elif [ "${{ inputs.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||||
|
machine_type=multi-gpu
|
||||||
|
else
|
||||||
|
machine_type=${{ inputs.machine_type }}
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$machine_type"
|
||||||
|
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
|
run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
|
run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
|
||||||
|
|
||||||
- name: Run test
|
- name: Run test
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
||||||
echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
|
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
|
||||||
echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
|
echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
|
||||||
|
|
||||||
- name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
|
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
|
name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
|
||||||
path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
||||||
|
3
.github/workflows/self-scheduled-caller.yml
vendored
3
.github/workflows/self-scheduled-caller.yml
vendored
@ -2,6 +2,9 @@ name: Self-hosted runner (scheduled)
|
|||||||
|
|
||||||
|
|
||||||
on:
|
on:
|
||||||
|
repository_dispatch:
|
||||||
|
schedule:
|
||||||
|
- cron: "17 2 * * *"
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- run_scheduled_ci*
|
- run_scheduled_ci*
|
||||||
|
167
.github/workflows/self-scheduled.yml
vendored
167
.github/workflows/self-scheduled.yml
vendored
@ -50,8 +50,9 @@ jobs:
|
|||||||
name: Setup
|
name: Setup
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
machine_type: [single-gpu, multi-gpu]
|
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
|
runs-on:
|
||||||
|
group: '${{ matrix.machine_type }}'
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-all-latest-gpu
|
image: huggingface/transformers-all-latest-gpu
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@ -102,7 +103,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
machine_type: [single-gpu, multi-gpu]
|
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||||
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
|
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
|
||||||
uses: ./.github/workflows/model_jobs.yml
|
uses: ./.github/workflows/model_jobs.yml
|
||||||
with:
|
with:
|
||||||
@ -119,8 +120,9 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
machine_type: [single-gpu, multi-gpu]
|
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
|
runs-on:
|
||||||
|
group: '${{ matrix.machine_type }}'
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-pytorch-gpu
|
image: huggingface/transformers-pytorch-gpu
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@ -146,22 +148,39 @@ jobs:
|
|||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: pip freeze
|
run: pip freeze
|
||||||
|
|
||||||
|
- name: Set `machine_type` for report and artifact names
|
||||||
|
working-directory: /transformers
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "${{ matrix.machine_type }}"
|
||||||
|
|
||||||
|
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||||
|
machine_type=single-gpu
|
||||||
|
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||||
|
machine_type=multi-gpu
|
||||||
|
else
|
||||||
|
machine_type=${{ matrix.machine_type }}
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$machine_type"
|
||||||
|
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Run all pipeline tests on GPU
|
- name: Run all pipeline tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines
|
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
|
run: cat /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
|
||||||
|
|
||||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports"
|
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports"
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
|
name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
|
||||||
path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
|
path: /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
|
||||||
|
|
||||||
run_pipelines_tf_gpu:
|
run_pipelines_tf_gpu:
|
||||||
if: ${{ inputs.job == 'run_pipelines_tf_gpu' }}
|
if: ${{ inputs.job == 'run_pipelines_tf_gpu' }}
|
||||||
@ -169,8 +188,9 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
machine_type: [single-gpu, multi-gpu]
|
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
|
runs-on:
|
||||||
|
group: '${{ matrix.machine_type }}'
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-tensorflow-gpu
|
image: huggingface/transformers-tensorflow-gpu
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@ -197,22 +217,39 @@ jobs:
|
|||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: pip freeze
|
run: pip freeze
|
||||||
|
|
||||||
|
- name: Set `machine_type` for report and artifact names
|
||||||
|
working-directory: /transformers
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "${{ matrix.machine_type }}"
|
||||||
|
|
||||||
|
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||||
|
machine_type=single-gpu
|
||||||
|
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||||
|
machine_type=multi-gpu
|
||||||
|
else
|
||||||
|
machine_type=${{ matrix.machine_type }}
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$machine_type"
|
||||||
|
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Run all pipeline tests on GPU
|
- name: Run all pipeline tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines
|
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
run: |
|
run: |
|
||||||
cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt
|
cat /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt
|
||||||
|
|
||||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports"
|
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports"
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports
|
name: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports
|
||||||
path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports
|
path: /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports
|
||||||
|
|
||||||
run_examples_gpu:
|
run_examples_gpu:
|
||||||
if: ${{ inputs.job == 'run_examples_gpu' }}
|
if: ${{ inputs.job == 'run_examples_gpu' }}
|
||||||
@ -220,8 +257,9 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
machine_type: [single-gpu]
|
machine_type: [aws-g4dn-2xlarge-cache]
|
||||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
|
runs-on:
|
||||||
|
group: '${{ matrix.machine_type }}'
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-all-latest-gpu
|
image: huggingface/transformers-all-latest-gpu
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@ -247,23 +285,40 @@ jobs:
|
|||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: pip freeze
|
run: pip freeze
|
||||||
|
|
||||||
|
- name: Set `machine_type` for report and artifact names
|
||||||
|
working-directory: /transformers
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "${{ matrix.machine_type }}"
|
||||||
|
|
||||||
|
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||||
|
machine_type=single-gpu
|
||||||
|
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||||
|
machine_type=multi-gpu
|
||||||
|
else
|
||||||
|
machine_type=${{ matrix.machine_type }}
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$machine_type"
|
||||||
|
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Run examples tests on GPU
|
- name: Run examples tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
pip install -r examples/pytorch/_tests_requirements.txt
|
pip install -r examples/pytorch/_tests_requirements.txt
|
||||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch
|
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_examples_gpu_test_reports examples/pytorch
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
|
run: cat /transformers/reports/${{ env.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
|
||||||
|
|
||||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports"
|
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_examples_gpu_test_reports"
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports
|
name: ${{ env.machine_type }}_run_examples_gpu_test_reports
|
||||||
path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports
|
path: /transformers/reports/${{ env.machine_type }}_run_examples_gpu_test_reports
|
||||||
|
|
||||||
run_torch_cuda_extensions_gpu:
|
run_torch_cuda_extensions_gpu:
|
||||||
if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
|
if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
|
||||||
@ -271,8 +326,9 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
machine_type: [single-gpu, multi-gpu]
|
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
|
runs-on:
|
||||||
|
group: '${{ matrix.machine_type }}'
|
||||||
container:
|
container:
|
||||||
image: ${{ inputs.docker }}
|
image: ${{ inputs.docker }}
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@ -326,22 +382,39 @@ jobs:
|
|||||||
working-directory: ${{ inputs.working-directory-prefix }}/transformers
|
working-directory: ${{ inputs.working-directory-prefix }}/transformers
|
||||||
run: pip freeze
|
run: pip freeze
|
||||||
|
|
||||||
|
- name: Set `machine_type` for report and artifact names
|
||||||
|
working-directory: /transformers
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "${{ matrix.machine_type }}"
|
||||||
|
|
||||||
|
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||||
|
machine_type=single-gpu
|
||||||
|
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||||
|
machine_type=multi-gpu
|
||||||
|
else
|
||||||
|
machine_type=${{ matrix.machine_type }}
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$machine_type"
|
||||||
|
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
working-directory: ${{ inputs.working-directory-prefix }}/transformers
|
working-directory: ${{ inputs.working-directory-prefix }}/transformers
|
||||||
run: |
|
run: |
|
||||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
|
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat ${{ inputs.working-directory-prefix }}/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
|
run: cat ${{ inputs.working-directory-prefix }}/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
|
||||||
|
|
||||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
|
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
||||||
path: ${{ inputs.working-directory-prefix }}/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
path: ${{ inputs.working-directory-prefix }}/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
||||||
|
|
||||||
run_quantization_torch_gpu:
|
run_quantization_torch_gpu:
|
||||||
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
|
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
|
||||||
@ -352,8 +425,9 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
|
folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
|
||||||
machine_type: [single-gpu, multi-gpu]
|
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
|
runs-on:
|
||||||
|
group: '${{ matrix.machine_type }}'
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-quantization-latest-gpu
|
image: huggingface/transformers-quantization-latest-gpu
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@ -388,22 +462,39 @@ jobs:
|
|||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: pip freeze
|
run: pip freeze
|
||||||
|
|
||||||
|
- name: Set `machine_type` for report and artifact names
|
||||||
|
working-directory: /transformers
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "${{ matrix.machine_type }}"
|
||||||
|
|
||||||
|
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
|
||||||
|
machine_type=single-gpu
|
||||||
|
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
|
||||||
|
machine_type=multi-gpu
|
||||||
|
else
|
||||||
|
machine_type=${{ matrix.machine_type }}
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$machine_type"
|
||||||
|
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Run quantization tests on GPU
|
- name: Run quantization tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
|
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
|
run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
|
||||||
|
|
||||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
|
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
|
name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
|
||||||
path: /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
|
path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
|
||||||
|
|
||||||
run_extract_warnings:
|
run_extract_warnings:
|
||||||
# Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic.
|
# Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic.
|
||||||
|
Loading…
Reference in New Issue
Block a user