From cb17103bd5e31373e090f2f37602dcc992c017e4 Mon Sep 17 00:00:00 2001 From: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com> Date: Fri, 27 Jun 2025 13:51:46 +0200 Subject: [PATCH] Uninstallling Flash attention from quantization docker (#39078) * update * revert --- docker/transformers-quantization-latest-gpu/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile index c860dabd6ac..ad9cf891e25 100755 --- a/docker/transformers-quantization-latest-gpu/Dockerfile +++ b/docker/transformers-quantization-latest-gpu/Dockerfile @@ -93,6 +93,9 @@ RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch] # `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs RUN python3 -m pip uninstall -y kernels +# Uninstall flash-attn installed by autoawq, it causes issues here : https://github.com/huggingface/transformers/actions/runs/15915442841/job/44892146131 +RUN python3 -m pip uninstall -y flash-attn + # When installing in editable mode, `transformers` is not recognized as a package. # this line must be added in order for python to be aware of transformers. RUN cd transformers && python3 setup.py develop