🧹 remove generate-related objects and methods scheduled for removal in v4.48 (#35677)

* remove things scheduled for removal * make fixup
2025-07-03 12:50:06 +06:00 · 2025-01-16 17:03:20 +00:00 · 2025-01-16 17:03:20 +00:00 · 80dbbd103c
commit 80dbbd103c
parent aeeceb9916
8 changed files with 1 additions and 73 deletions
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@ -1377,7 +1377,6 @@ else:
            "LogitNormalization",
            "LogitsProcessor",
            "LogitsProcessorList",
-            "LogitsWarper",
            "MaxLengthCriteria",
            "MaxTimeCriteria",
            "MinLengthLogitsProcessor",
@ -6460,7 +6459,6 @@ if TYPE_CHECKING:
            LogitNormalization,
            LogitsProcessor,
            LogitsProcessorList,
-            LogitsWarper,
            MaxLengthCriteria,
            MaxTimeCriteria,
            MinLengthLogitsProcessor,
--- a/src/transformers/cache_utils.py
+++ b/src/transformers/cache_utils.py
@ -63,17 +63,6 @@ class Cache(torch.nn.Module):
        # TODO: deprecate this function in favor of `cache_position`
        raise NotImplementedError("Make sure to implement `get_seq_length` in a subclass.")

-    # Deprecate in favor of max-cache-shape because we want to be specifc by what we mean with "max_length"
-    # Prev some cache objects didn't have "max_length" (SlidingWindowCache or SinkCache) because the cache object technically handles
-    # infinite amount of tokens. In the codebase what we really need to check is the max capacity of certain cache instances, so
-    # we change naming to be more explicit
-    def get_max_length(self) -> Optional[int]:
-        logger.warning_once(
-            "`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. "
-            "Calling `get_max_cache()` will raise error from v4.48"
-        )
-        return self.get_max_cache_shape()
-
    def get_max_cache_shape(self) -> Optional[int]:
        """Returns the maximum sequence length (i.e. max capacity) of the cache object"""
        raise NotImplementedError("Make sure to implement `get_max_cache_shape` in a subclass.")
--- a/src/transformers/generation/init.py
+++ b/src/transformers/generation/init.py
@ -68,7 +68,6 @@ else:
        "LogitNormalization",
        "LogitsProcessor",
        "LogitsProcessorList",
-        "LogitsWarper",
        "MinLengthLogitsProcessor",
        "MinNewTokensLengthLogitsProcessor",
        "MinPLogitsWarper",
@ -89,7 +88,6 @@ else:
        "WatermarkLogitsProcessor",
    ]
    _import_structure["stopping_criteria"] = [
-        "MaxNewTokensCriteria",
        "MaxLengthCriteria",
        "MaxTimeCriteria",
        "ConfidenceCriteria",
@ -230,7 +228,6 @@ if TYPE_CHECKING:
            LogitNormalization,
            LogitsProcessor,
            LogitsProcessorList,
-            LogitsWarper,
            MinLengthLogitsProcessor,
            MinNewTokensLengthLogitsProcessor,
            MinPLogitsWarper,
@ -254,7 +251,6 @@ if TYPE_CHECKING:
            ConfidenceCriteria,
            EosTokenCriteria,
            MaxLengthCriteria,
-            MaxNewTokensCriteria,
            MaxTimeCriteria,
            StoppingCriteria,
            StoppingCriteriaList,
--- a/src/transformers/generation/logits_process.py
+++ b/src/transformers/generation/logits_process.py
@ -52,22 +52,6 @@ class LogitsProcessor:
        )


-class LogitsWarper:
-    """Abstract base class for all logit warpers that can be applied during generation with multinomial sampling."""
-
-    def __init__(self):
-        logger.warning_once(
-            "`LogitsWarper` is deprecated and will be removed in v4.48. Your class should inherit `LogitsProcessor` "
-            "instead, which has the same properties and interface."
-        )
-
-    @add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING)
-    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
-        raise NotImplementedError(
-            f"{self.__class__} is an abstract class. Only classes inheriting this class can be called."
-        )
-
-
 class LogitsProcessorList(list):
    """
    This class can be used to create a list of [`LogitsProcessor`] to subsequently process a `scores` input tensor.
--- a/src/transformers/models/gpt_neox/modeling_gpt_neox.py
+++ b/src/transformers/models/gpt_neox/modeling_gpt_neox.py
@ -467,28 +467,6 @@ class GPTNeoXAttention(nn.Module):
        return target_dtype


-# TODO Remove in deprecation cycle
-class GPTNeoXFlashAttention2(GPTNeoXAttention):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-        logger.warning_once(
-            "The `GPTNeoXFlashAttention2` class is deprecated in favor of simply modifying the `config._attn_implementation`"
-            "attribute of the `GPTNeoXAttention` class! It will be removed in v4.48"
-        )
-
-
-# TODO Remove in deprecation cycle
-class GPTNeoXSdpaAttention(GPTNeoXAttention):
-    def __init__(self, config, layer_idx=None):
-        super().__init__(config, layer_idx=layer_idx)
-
-        logger.warning_once(
-            "The `GPTNeoXSdpaAttention` class is deprecated in favor of simply modifying the `config._attn_implementation`"
-            "attribute of the `GPTNeoXAttention` class! It will be removed in v4.48"
-        )
-
-
 # Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding with Llama->GPTNeoX
 class GPTNeoXRotaryEmbedding(nn.Module):
    def __init__(self, config: GPTNeoXConfig, device=None):
@ -600,14 +578,6 @@ class GPTNeoXMLP(nn.Module):
        return hidden_states


-GPT_NEOX_ATTENTION_CLASSES = {
-    "eager": GPTNeoXAttention,
-    "flash_attention_2": GPTNeoXFlashAttention2,
-    "sdpa": GPTNeoXSdpaAttention,
-    "flex_attention": GPTNeoXAttention,
-}
-
-
 class GPTNeoXLayer(nn.Module):
    def __init__(self, config, layer_idx):
        super().__init__()
@ -616,7 +586,7 @@ class GPTNeoXLayer(nn.Module):
        self.post_attention_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.post_attention_dropout = nn.Dropout(config.hidden_dropout)
        self.post_mlp_dropout = nn.Dropout(config.hidden_dropout)
-        self.attention = GPT_NEOX_ATTENTION_CLASSES[config._attn_implementation](config, layer_idx)
+        self.attention = GPTNeoXAttention(config, layer_idx)
        self.mlp = GPTNeoXMLP(config)

    def forward(
--- a/src/transformers/utils/dummy_pt_objects.py
+++ b/src/transformers/utils/dummy_pt_objects.py
@ -352,13 +352,6 @@ class LogitsProcessorList(metaclass=DummyObject):
        requires_backends(self, ["torch"])


-class LogitsWarper(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class MaxLengthCriteria(metaclass=DummyObject):
    _backends = ["torch"]

--- a/utils/check_docstrings.py
+++ b/utils/check_docstrings.py
@ -70,7 +70,6 @@ OBJECTS_TO_IGNORE = [
    # Deprecated
    "InputExample",
    "InputFeatures",
-    "LogitsWarper",
    # Signature is *args/**kwargs
    "TFSequenceSummary",
    "TFBertTokenizer",
--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@ -946,7 +946,6 @@ DEPRECATED_OBJECTS = [
    "LineByLineTextDataset",
    "LineByLineWithRefDataset",
    "LineByLineWithSOPTextDataset",
-    "LogitsWarper",
    "NerPipeline",
    "PretrainedBartModel",
    "PretrainedFSMTModel",