mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00

* Add a Dockerfile for PyTorch + ROCm based on official AMD released artifact * Add a new artifact single-amdgpu testing on main * Attempt to test the workflow without merging. * Changed BERT to check if things are triggered * Meet the dependencies graph on workflow * Revert BERT changes * Add check_runners_amdgpu to correctly mount and check availability * Rename setup to setup_gpu for CUDA and add setup_amdgpu for AMD * Fix all the needs.setup -> needs.setup_[gpu|amdgpu] dependencies * Fix setup dependency graph to use check_runner_amdgpu * Let's do the runner status check only on AMDGPU target * Update the Dockerfile.amd to put ourselves in / rather than /var/lib * Restore the whole setup for CUDA too. * Let's redisable them * Change BERT to trigger tests * Restore BERT * Add torchaudio with rocm 5.6 to AMD Dockerfile (#26050) fix dockerfile Co-authored-by: Felix Marty <felix@hf.co> * Place AMD GPU tests in a separate workflow (correct branch) (#26105) AMDGPU CI lives in an other workflow * Fix invalid job name is dependencies. * Remove tests multi-amdgpu for now. * Use single-amdgpu * Use --net=host for now. * Remote host networking. * Removed duplicated check_runners_amdgpu step * Let's tag machine-types with mi210 for now. * Machine type should be only mi210 * Remove unnecessary push.branches item * Apply review suggestions moving from `x-amdgpu` to `x-gpu` introducing `amd-gpu` and `miXXX` labels. * Remove amdgpu from step names. * finalize * delete --------- Co-authored-by: fxmarty <9808326+fxmarty@users.noreply.github.com> Co-authored-by: Felix Marty <felix@hf.co> Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
273 lines
8.7 KiB
YAML
273 lines
8.7 KiB
YAML
name: Build docker images (scheduled)
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- build_ci_docker_image*
|
|
repository_dispatch:
|
|
workflow_call:
|
|
inputs:
|
|
image_postfix:
|
|
required: true
|
|
type: string
|
|
schedule:
|
|
- cron: "17 0 * * *"
|
|
|
|
concurrency:
|
|
group: docker-images-builds
|
|
cancel-in-progress: false
|
|
|
|
jobs:
|
|
latest-docker:
|
|
name: "Latest PyTorch + TensorFlow [dev]"
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Cleanup disk
|
|
run: |
|
|
sudo ls -l /usr/local/lib/
|
|
sudo ls -l /usr/share/
|
|
sudo du -sh /usr/local/lib/
|
|
sudo du -sh /usr/share/
|
|
sudo rm -rf /usr/local/lib/android
|
|
sudo rm -rf /usr/share/dotnet
|
|
sudo du -sh /usr/local/lib/
|
|
sudo du -sh /usr/share/
|
|
-
|
|
name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
-
|
|
name: Check out code
|
|
uses: actions/checkout@v3
|
|
-
|
|
name: Login to DockerHub
|
|
uses: docker/login-action@v3
|
|
with:
|
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
-
|
|
name: Build and push
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: ./docker/transformers-all-latest-gpu
|
|
build-args: |
|
|
REF=main
|
|
push: true
|
|
tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
|
|
# Push CI images still need to be re-built daily
|
|
-
|
|
name: Build and push (for Push CI) in a daily basis
|
|
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
|
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
|
if: inputs.image_postfix != '-push-ci'
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: ./docker/transformers-all-latest-gpu
|
|
build-args: |
|
|
REF=main
|
|
push: true
|
|
tags: huggingface/transformers-all-latest-gpu-push-ci
|
|
|
|
latest-torch-deepspeed-docker:
|
|
name: "Latest PyTorch + DeepSpeed"
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Cleanup disk
|
|
run: |
|
|
sudo ls -l /usr/local/lib/
|
|
sudo ls -l /usr/share/
|
|
sudo du -sh /usr/local/lib/
|
|
sudo du -sh /usr/share/
|
|
sudo rm -rf /usr/local/lib/android
|
|
sudo rm -rf /usr/share/dotnet
|
|
sudo du -sh /usr/local/lib/
|
|
sudo du -sh /usr/share/
|
|
-
|
|
name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
-
|
|
name: Check out code
|
|
uses: actions/checkout@v3
|
|
-
|
|
name: Login to DockerHub
|
|
uses: docker/login-action@v3
|
|
with:
|
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
-
|
|
name: Build and push
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: ./docker/transformers-pytorch-deepspeed-latest-gpu
|
|
build-args: |
|
|
REF=main
|
|
push: true
|
|
tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
|
|
|
|
# Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
|
|
latest-torch-deepspeed-docker-for-push-ci-daily-build:
|
|
name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Cleanup disk
|
|
run: |
|
|
sudo ls -l /usr/local/lib/
|
|
sudo ls -l /usr/share/
|
|
sudo du -sh /usr/local/lib/
|
|
sudo du -sh /usr/share/
|
|
sudo rm -rf /usr/local/lib/android
|
|
sudo rm -rf /usr/share/dotnet
|
|
sudo du -sh /usr/local/lib/
|
|
sudo du -sh /usr/share/
|
|
-
|
|
name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
-
|
|
name: Check out code
|
|
uses: actions/checkout@v3
|
|
-
|
|
name: Login to DockerHub
|
|
uses: docker/login-action@v3
|
|
with:
|
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
# Push CI images still need to be re-built daily
|
|
-
|
|
name: Build and push (for Push CI) in a daily basis
|
|
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
|
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
|
if: inputs.image_postfix != '-push-ci'
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: ./docker/transformers-pytorch-deepspeed-latest-gpu
|
|
build-args: |
|
|
REF=main
|
|
push: true
|
|
tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
|
|
|
|
doc-builder:
|
|
name: "Doc builder"
|
|
# Push CI doesn't need this image
|
|
if: inputs.image_postfix != '-push-ci'
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
-
|
|
name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
-
|
|
name: Check out code
|
|
uses: actions/checkout@v3
|
|
-
|
|
name: Login to DockerHub
|
|
uses: docker/login-action@v3
|
|
with:
|
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
-
|
|
name: Build and push
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: ./docker/transformers-doc-builder
|
|
push: true
|
|
tags: huggingface/transformers-doc-builder
|
|
|
|
latest-pytorch:
|
|
name: "Latest PyTorch [dev]"
|
|
# Push CI doesn't need this image
|
|
if: inputs.image_postfix != '-push-ci'
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Cleanup disk
|
|
run: |
|
|
sudo ls -l /usr/local/lib/
|
|
sudo ls -l /usr/share/
|
|
sudo du -sh /usr/local/lib/
|
|
sudo du -sh /usr/share/
|
|
sudo rm -rf /usr/local/lib/android
|
|
sudo rm -rf /usr/share/dotnet
|
|
sudo du -sh /usr/local/lib/
|
|
sudo du -sh /usr/share/
|
|
-
|
|
name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
-
|
|
name: Check out code
|
|
uses: actions/checkout@v3
|
|
-
|
|
name: Login to DockerHub
|
|
uses: docker/login-action@v3
|
|
with:
|
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
-
|
|
name: Build and push
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: ./docker/transformers-pytorch-gpu
|
|
build-args: |
|
|
REF=main
|
|
push: true
|
|
tags: huggingface/transformers-pytorch-gpu
|
|
|
|
latest-pytorch-amd:
|
|
name: "Latest PyTorch (AMD) [dev]"
|
|
runs-on: [self-hosted, docker-gpu, amd-gpu, single-gpu, mi210]
|
|
steps:
|
|
- name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
- name: Check out code
|
|
uses: actions/checkout@v3
|
|
- name: Login to DockerHub
|
|
uses: docker/login-action@v3
|
|
with:
|
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
- name: Build and push
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: ./docker/transformers-pytorch-amd-gpu
|
|
build-args: |
|
|
REF=main
|
|
push: true
|
|
tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }}
|
|
# Push CI images still need to be re-built daily
|
|
-
|
|
name: Build and push (for Push CI) in a daily basis
|
|
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
|
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
|
if: inputs.image_postfix != '-push-ci'
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: ./docker/transformers-pytorch-amd-gpu
|
|
build-args: |
|
|
REF=main
|
|
push: true
|
|
tags: huggingface/transformers-pytorch-amd-gpu-push-ci
|
|
|
|
latest-tensorflow:
|
|
name: "Latest TensorFlow [dev]"
|
|
# Push CI doesn't need this image
|
|
if: inputs.image_postfix != '-push-ci'
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
-
|
|
name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
-
|
|
name: Check out code
|
|
uses: actions/checkout@v3
|
|
-
|
|
name: Login to DockerHub
|
|
uses: docker/login-action@v3
|
|
with:
|
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
-
|
|
name: Build and push
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: ./docker/transformers-tensorflow-gpu
|
|
build-args: |
|
|
REF=main
|
|
push: true
|
|
tags: huggingface/transformers-tensorflow-gpu
|