[docs] Cache implementations (#34325)

cache
2025-07-31 02:02:21 +06:00 · 2024-10-25 08:52:45 -07:00 · 2024-10-25 08:52:45 -07:00 · 1d06379331
commit 1d06379331
parent 6a62a6d1b5
1 changed files with 9 additions and 1 deletions
--- a/src/transformers/generation/configuration_utils.py
+++ b/src/transformers/generation/configuration_utils.py
@ -172,7 +172,15 @@ class GenerationConfig(PushToHubMixin):
            speed up decoding.
        cache_implementation (`str`, *optional*, default to `None`):
            Name of the cache class that will be instantiated in `generate`, for faster decoding. Possible values are:
-            {ALL_CACHE_IMPLEMENTATIONS}. We support other cache types, but they must be manually instantiated and
+
+            - `"static"`: [`StaticCache`]
+            - `"offloaded_static"`: [`OffloadedStaticCache`]
+            - `"sliding_window"`: [`SlidingWindowCache`]
+            - `"hybrid"`: [`HybridCache`]
+            - `"mamba"`: [`MambaCache`]
+            - `"quantized"`: [`QuantizedCache`]
+
+            We support other cache types, but they must be manually instantiated and
            passed to `generate` through the `past_key_values` argument. See our
            [cache documentation](https://huggingface.co/docs/transformers/en/kv_cache) for further information.
        cache_config (`CacheConfig` or `dict`, *optional*, default to `None`):