Fix DeepSpeed stuff in the nightly CI (#23478)

fix

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar 2023-05-19 20:31:55 +02:00 committed by GitHub
parent 3cb9309024
commit 1f2c00d671
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 1 deletions

View File

@ -52,6 +52,16 @@ jobs:
name: "Nightly PyTorch + DeepSpeed"
runs-on: ubuntu-latest
steps:
- name: Cleanup disk
run: |
sudo ls -l /usr/local/lib/
sudo ls -l /usr/share/
sudo du -sh /usr/local/lib/
sudo du -sh /usr/share/
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo du -sh /usr/local/lib/
sudo du -sh /usr/share/
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

View File

@ -1,4 +1,4 @@
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel_22-08.html#rel_22-08
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-22-12.html#rel-22-12
FROM nvcr.io/nvidia/pytorch:22.12-py3
LABEL maintainer="Hugging Face"
@ -25,6 +25,9 @@ RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
# Uninstall `transformer-engine` shipped with the base image
RUN python3 -m pip uninstall -y transformer-engine
# Uninstall `torch-tensorrt` and `apex` shipped with the base image
RUN python3 -m pip uninstall -y torch-tensorrt apex