[HybridCache] disable automatic compilation (#36620)

This commit is contained in:
Joao Gante 2025-03-10 09:24:26 +00:00 committed by GitHub
parent 94ae1ba5b5
commit 858545047c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 9 additions and 1 deletions

View File

@ -1602,7 +1602,9 @@ class HybridCache(Cache):
```
"""
is_compileable = True
# TODO (joao): dive deeper into gemma2 and paligemma -- there are reports of speed loss with compilation. Revert
# ALL changes from the PR that commented the line below when reactivating it.
# is_compileable = True
# TODO (joao): remove `=None` in non-optional arguments in v4.46. Remove from `OBJECTS_TO_IGNORE` as well.
@deprecate_kwarg("layer_device_map", version="4.52.0")

View File

@ -2118,6 +2118,9 @@ class GenerationTesterMixin:
Tests that `.generate` is compatible with torch.compile without graph breaks, keeping the same results.
Runs two sequential generations to ensure the cache doesn't get stuck after the first compiled run! ⚠️
"""
# Monkey-patching the HybridCache at test-time to continue testing compilation support
HybridCache.is_compileable = True
for model_class in self.all_generative_model_classes:
if not model_class._supports_static_cache:
self.skipTest("This model doesn't support static cache (= no expectations of compilation support)")
@ -2214,6 +2217,9 @@ class GenerationTesterMixin:
Tests that all optional outputs are behaving as expected when compilation is triggered.
In essence, it's the same as `test_greedy_generate_dict_outputs`, but with automatic compilation triggered.
"""
# Monkey-patching the HybridCache at test-time to continue testing compilation support
HybridCache.is_compileable = True
for model_class in self.all_generative_model_classes:
if not model_class._supports_static_cache:
self.skipTest("This model doesn't support static cache (= no expectations of compilation support)")