🧹 remove generate-related objects and methods scheduled for removal in v4.48 (#35677)

* remove things scheduled for removal * make fixup
2025-07-16 19:18:24 +06:00 · 2025-01-16 17:03:20 +00:00 · 2025-01-16 17:03:20 +00:00 · 80dbbd103c
commit 80dbbd103c
parent aeeceb9916
8 changed files with 1 additions and 73 deletions
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@ -1377,7 +1377,6 @@ else:
            "LogitNormalization",
            "LogitsProcessor",
            "LogitsProcessorList",
            "LogitsWarper",
            "MaxLengthCriteria",
            "MaxTimeCriteria",
            "MinLengthLogitsProcessor",
@ -6460,7 +6459,6 @@ if TYPE_CHECKING:
            LogitNormalization,
            LogitsProcessor,
            LogitsProcessorList,
            LogitsWarper,
            MaxLengthCriteria,
            MaxTimeCriteria,
            MinLengthLogitsProcessor,
--- a/src/transformers/cache_utils.py
+++ b/src/transformers/cache_utils.py
@ -63,17 +63,6 @@ class Cache(torch.nn.Module):
        # TODO: deprecate this function in favor of `cache_position`
        raise NotImplementedError("Make sure to implement `get_seq_length` in a subclass.")
    # Deprecate in favor of max-cache-shape because we want to be specifc by what we mean with "max_length"
    # Prev some cache objects didn't have "max_length" (SlidingWindowCache or SinkCache) because the cache object technically handles
    # infinite amount of tokens. In the codebase what we really need to check is the max capacity of certain cache instances, so
    # we change naming to be more explicit
    def get_max_length(self) -> Optional[int]:
        logger.warning_once(
            "`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. "
            "Calling `get_max_cache()` will raise error from v4.48"
        )
        return self.get_max_cache_shape()
    def get_max_cache_shape(self) -> Optional[int]:
        """Returns the maximum sequence length (i.e. max capacity) of the cache object"""
        raise NotImplementedError("Make sure to implement `get_max_cache_shape` in a subclass.")
--- a/src/transformers/generation/init.py
+++ b/src/transformers/generation/init.py
@ -68,7 +68,6 @@ else:
        "LogitNormalization",
        "LogitsProcessor",
        "LogitsProcessorList",
        "LogitsWarper",
        "MinLengthLogitsProcessor",
        "MinNewTokensLengthLogitsProcessor",
        "MinPLogitsWarper",
@ -89,7 +88,6 @@ else:
        "WatermarkLogitsProcessor",
    ]
    _import_structure["stopping_criteria"] = [
        "MaxNewTokensCriteria",
        "MaxLengthCriteria",
        "MaxTimeCriteria",
        "ConfidenceCriteria",
@ -230,7 +228,6 @@ if TYPE_CHECKING:
            LogitNormalization,
            LogitsProcessor,
            LogitsProcessorList,
            LogitsWarper,
            MinLengthLogitsProcessor,
            MinNewTokensLengthLogitsProcessor,
            MinPLogitsWarper,
@ -254,7 +251,6 @@ if TYPE_CHECKING:
            ConfidenceCriteria,
            EosTokenCriteria,
            MaxLengthCriteria,
            MaxNewTokensCriteria,
            MaxTimeCriteria,
            StoppingCriteria,
            StoppingCriteriaList,
--- a/src/transformers/generation/logits_process.py
+++ b/src/transformers/generation/logits_process.py
@ -52,22 +52,6 @@ class LogitsProcessor:
        )
 class LogitsWarper:
    """Abstract base class for all logit warpers that can be applied during generation with multinomial sampling."""
    def __init__(self):
        logger.warning_once(
            "`LogitsWarper` is deprecated and will be removed in v4.48. Your class should inherit `LogitsProcessor` "
            "instead, which has the same properties and interface."
        )
    @add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING)
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
        raise NotImplementedError(
            f"{self.__class__} is an abstract class. Only classes inheriting this class can be called."
        )
 class LogitsProcessorList(list):
    """
    This class can be used to create a list of [`LogitsProcessor`] to subsequently process a `scores` input tensor.
--- a/src/transformers/models/gpt_neox/modeling_gpt_neox.py
+++ b/src/transformers/models/gpt_neox/modeling_gpt_neox.py
@ -467,28 +467,6 @@ class GPTNeoXAttention(nn.Module):
        return target_dtype
 # TODO Remove in deprecation cycle
 class GPTNeoXFlashAttention2(GPTNeoXAttention):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        logger.warning_once(
            "The `GPTNeoXFlashAttention2` class is deprecated in favor of simply modifying the `config._attn_implementation`"
            "attribute of the `GPTNeoXAttention` class! It will be removed in v4.48"
        )
 # TODO Remove in deprecation cycle
 class GPTNeoXSdpaAttention(GPTNeoXAttention):
    def __init__(self, config, layer_idx=None):
        super().__init__(config, layer_idx=layer_idx)
        logger.warning_once(
            "The `GPTNeoXSdpaAttention` class is deprecated in favor of simply modifying the `config._attn_implementation`"
            "attribute of the `GPTNeoXAttention` class! It will be removed in v4.48"
        )
 # Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding with Llama->GPTNeoX
 class GPTNeoXRotaryEmbedding(nn.Module):
    def __init__(self, config: GPTNeoXConfig, device=None):
@ -600,14 +578,6 @@ class GPTNeoXMLP(nn.Module):
        return hidden_states
 GPT_NEOX_ATTENTION_CLASSES = {
    "eager": GPTNeoXAttention,
    "flash_attention_2": GPTNeoXFlashAttention2,
    "sdpa": GPTNeoXSdpaAttention,
    "flex_attention": GPTNeoXAttention,
 }
 class GPTNeoXLayer(nn.Module):
    def __init__(self, config, layer_idx):
        super().__init__()
@ -616,7 +586,7 @@ class GPTNeoXLayer(nn.Module):
        self.post_attention_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.post_attention_dropout = nn.Dropout(config.hidden_dropout)
        self.post_mlp_dropout = nn.Dropout(config.hidden_dropout)
-        self.attention = GPT_NEOX_ATTENTION_CLASSES[config._attn_implementation](config, layer_idx)
+        self.attention = GPTNeoXAttention(config, layer_idx)
        self.mlp = GPTNeoXMLP(config)
    def forward(
--- a/src/transformers/utils/dummy_pt_objects.py
+++ b/src/transformers/utils/dummy_pt_objects.py
@ -352,13 +352,6 @@ class LogitsProcessorList(metaclass=DummyObject):
        requires_backends(self, ["torch"])
 class LogitsWarper(metaclass=DummyObject):
    _backends = ["torch"]
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["torch"])
 class MaxLengthCriteria(metaclass=DummyObject):
    _backends = ["torch"]
--- a/utils/check_docstrings.py
+++ b/utils/check_docstrings.py
@ -70,7 +70,6 @@ OBJECTS_TO_IGNORE = [
    # Deprecated
    "InputExample",
    "InputFeatures",
    "LogitsWarper",
    # Signature is *args/**kwargs
    "TFSequenceSummary",
    "TFBertTokenizer",
--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@ -946,7 +946,6 @@ DEPRECATED_OBJECTS = [
    "LineByLineTextDataset",
    "LineByLineWithRefDataset",
    "LineByLineWithSOPTextDataset",
    "LogitsWarper",
    "NerPipeline",
    "PretrainedBartModel",
    "PretrainedFSMTModel",