🧹 remove generate-related objects and methods scheduled for removal in v4.48 (#35677)

* remove things scheduled for removal

* make fixup
This commit is contained in:
Joao Gante 2025-01-16 17:03:20 +00:00 committed by GitHub
parent aeeceb9916
commit 80dbbd103c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 1 additions and 73 deletions

View File

@ -1377,7 +1377,6 @@ else:
"LogitNormalization",
"LogitsProcessor",
"LogitsProcessorList",
"LogitsWarper",
"MaxLengthCriteria",
"MaxTimeCriteria",
"MinLengthLogitsProcessor",
@ -6460,7 +6459,6 @@ if TYPE_CHECKING:
LogitNormalization,
LogitsProcessor,
LogitsProcessorList,
LogitsWarper,
MaxLengthCriteria,
MaxTimeCriteria,
MinLengthLogitsProcessor,

View File

@ -63,17 +63,6 @@ class Cache(torch.nn.Module):
# TODO: deprecate this function in favor of `cache_position`
raise NotImplementedError("Make sure to implement `get_seq_length` in a subclass.")
# Deprecate in favor of max-cache-shape because we want to be specifc by what we mean with "max_length"
# Prev some cache objects didn't have "max_length" (SlidingWindowCache or SinkCache) because the cache object technically handles
# infinite amount of tokens. In the codebase what we really need to check is the max capacity of certain cache instances, so
# we change naming to be more explicit
def get_max_length(self) -> Optional[int]:
logger.warning_once(
"`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. "
"Calling `get_max_cache()` will raise error from v4.48"
)
return self.get_max_cache_shape()
def get_max_cache_shape(self) -> Optional[int]:
"""Returns the maximum sequence length (i.e. max capacity) of the cache object"""
raise NotImplementedError("Make sure to implement `get_max_cache_shape` in a subclass.")

View File

@ -68,7 +68,6 @@ else:
"LogitNormalization",
"LogitsProcessor",
"LogitsProcessorList",
"LogitsWarper",
"MinLengthLogitsProcessor",
"MinNewTokensLengthLogitsProcessor",
"MinPLogitsWarper",
@ -89,7 +88,6 @@ else:
"WatermarkLogitsProcessor",
]
_import_structure["stopping_criteria"] = [
"MaxNewTokensCriteria",
"MaxLengthCriteria",
"MaxTimeCriteria",
"ConfidenceCriteria",
@ -230,7 +228,6 @@ if TYPE_CHECKING:
LogitNormalization,
LogitsProcessor,
LogitsProcessorList,
LogitsWarper,
MinLengthLogitsProcessor,
MinNewTokensLengthLogitsProcessor,
MinPLogitsWarper,
@ -254,7 +251,6 @@ if TYPE_CHECKING:
ConfidenceCriteria,
EosTokenCriteria,
MaxLengthCriteria,
MaxNewTokensCriteria,
MaxTimeCriteria,
StoppingCriteria,
StoppingCriteriaList,

View File

@ -52,22 +52,6 @@ class LogitsProcessor:
)
class LogitsWarper:
"""Abstract base class for all logit warpers that can be applied during generation with multinomial sampling."""
def __init__(self):
logger.warning_once(
"`LogitsWarper` is deprecated and will be removed in v4.48. Your class should inherit `LogitsProcessor` "
"instead, which has the same properties and interface."
)
@add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING)
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
raise NotImplementedError(
f"{self.__class__} is an abstract class. Only classes inheriting this class can be called."
)
class LogitsProcessorList(list):
"""
This class can be used to create a list of [`LogitsProcessor`] to subsequently process a `scores` input tensor.

View File

@ -467,28 +467,6 @@ class GPTNeoXAttention(nn.Module):
return target_dtype
# TODO Remove in deprecation cycle
class GPTNeoXFlashAttention2(GPTNeoXAttention):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
logger.warning_once(
"The `GPTNeoXFlashAttention2` class is deprecated in favor of simply modifying the `config._attn_implementation`"
"attribute of the `GPTNeoXAttention` class! It will be removed in v4.48"
)
# TODO Remove in deprecation cycle
class GPTNeoXSdpaAttention(GPTNeoXAttention):
def __init__(self, config, layer_idx=None):
super().__init__(config, layer_idx=layer_idx)
logger.warning_once(
"The `GPTNeoXSdpaAttention` class is deprecated in favor of simply modifying the `config._attn_implementation`"
"attribute of the `GPTNeoXAttention` class! It will be removed in v4.48"
)
# Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding with Llama->GPTNeoX
class GPTNeoXRotaryEmbedding(nn.Module):
def __init__(self, config: GPTNeoXConfig, device=None):
@ -600,14 +578,6 @@ class GPTNeoXMLP(nn.Module):
return hidden_states
GPT_NEOX_ATTENTION_CLASSES = {
"eager": GPTNeoXAttention,
"flash_attention_2": GPTNeoXFlashAttention2,
"sdpa": GPTNeoXSdpaAttention,
"flex_attention": GPTNeoXAttention,
}
class GPTNeoXLayer(nn.Module):
def __init__(self, config, layer_idx):
super().__init__()
@ -616,7 +586,7 @@ class GPTNeoXLayer(nn.Module):
self.post_attention_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
self.post_attention_dropout = nn.Dropout(config.hidden_dropout)
self.post_mlp_dropout = nn.Dropout(config.hidden_dropout)
self.attention = GPT_NEOX_ATTENTION_CLASSES[config._attn_implementation](config, layer_idx)
self.attention = GPTNeoXAttention(config, layer_idx)
self.mlp = GPTNeoXMLP(config)
def forward(

View File

@ -352,13 +352,6 @@ class LogitsProcessorList(metaclass=DummyObject):
requires_backends(self, ["torch"])
class LogitsWarper(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class MaxLengthCriteria(metaclass=DummyObject):
_backends = ["torch"]

View File

@ -70,7 +70,6 @@ OBJECTS_TO_IGNORE = [
# Deprecated
"InputExample",
"InputFeatures",
"LogitsWarper",
# Signature is *args/**kwargs
"TFSequenceSummary",
"TFBertTokenizer",

View File

@ -946,7 +946,6 @@ DEPRECATED_OBJECTS = [
"LineByLineTextDataset",
"LineByLineWithRefDataset",
"LineByLineWithSOPTextDataset",
"LogitsWarper",
"NerPipeline",
"PretrainedBartModel",
"PretrainedFSMTModel",