mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00
update deepspeed docker (#37371)
* update * create docker image * 03 * uninstall pytest as it conflits with transformers * wrong one * better * see which package depends on pytest * up * resintall * fix * deepspeedddddddd * deepspeedddddddd * deepspeedddddddd * deepspeedddddddd * deepspeedddddddd * deepspeedddddddd * deepspeedddddddd * deepspeedddddddd --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
e3eda6d188
commit
7ae0be722e
2
.github/workflows/build-docker-images.yml
vendored
2
.github/workflows/build-docker-images.yml
vendored
@ -70,7 +70,7 @@ jobs:
|
||||
latest-torch-deepspeed-docker:
|
||||
name: "Latest PyTorch + DeepSpeed"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
group: aws-g4dn-2xlarge-cache
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
|
@ -42,7 +42,7 @@ jobs:
|
||||
nightly-torch-deepspeed-docker:
|
||||
name: "Nightly PyTorch + DeepSpeed"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
group: aws-g4dn-2xlarge-cache
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
|
@ -1,12 +1,12 @@
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11
|
||||
FROM nvcr.io/nvidia/pytorch:23.11-py3
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
|
||||
FROM nvcr.io/nvidia/pytorch:24.08-py3
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
ARG PYTORCH='2.2.0'
|
||||
ARG PYTORCH='2.6.0'
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu121'
|
||||
ARG CUDA='cu126'
|
||||
|
||||
RUN apt -y update
|
||||
RUN apt install -y libaio-dev
|
||||
@ -15,7 +15,8 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip
|
||||
ARG REF=main
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
|
||||
# `datasets` requires pandas, pandas has some modules compiled with numpy=1.x causing errors
|
||||
RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'pandas<2' 'numpy<2'
|
||||
|
||||
# Install latest release PyTorch
|
||||
# (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
|
||||
|
@ -1,11 +1,11 @@
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11
|
||||
FROM nvcr.io/nvidia/pytorch:23.11-py3
|
||||
FROM nvcr.io/nvidia/pytorch:24.08-py3
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu121'
|
||||
ARG CUDA='cu126'
|
||||
|
||||
RUN apt -y update
|
||||
RUN apt install -y libaio-dev
|
||||
@ -21,7 +21,8 @@ RUN python3 -m pip uninstall -y torch torchvision torchaudio
|
||||
# (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
|
||||
RUN python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
|
||||
# `datasets` requires pandas, pandas has some modules compiled with numpy=1.x causing errors
|
||||
RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'pandas<2' 'numpy<2'
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user