Uninstallling Flash attention from quantization docker (#39078)

* update * revert
2025-07-03 12:50:06 +06:00 · 2025-06-27 13:51:46 +02:00 · 2025-06-27 13:51:46 +02:00 · cb17103bd5
commit cb17103bd5
parent 371c471113
1 changed files with 3 additions and 0 deletions
--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@ -93,6 +93,9 @@ RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch]
 # `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
 RUN python3 -m pip uninstall -y kernels

+# Uninstall flash-attn installed by autoawq, it causes issues here : https://github.com/huggingface/transformers/actions/runs/15915442841/job/44892146131
+RUN python3 -m pip uninstall -y flash-attn
+
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop