mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-16 19:18:24 +06:00
🧹 remove generate
-related objects and methods scheduled for removal in v4.48 (#35677)
* remove things scheduled for removal * make fixup
This commit is contained in:
parent
aeeceb9916
commit
80dbbd103c
@ -1377,7 +1377,6 @@ else:
|
|||||||
"LogitNormalization",
|
"LogitNormalization",
|
||||||
"LogitsProcessor",
|
"LogitsProcessor",
|
||||||
"LogitsProcessorList",
|
"LogitsProcessorList",
|
||||||
"LogitsWarper",
|
|
||||||
"MaxLengthCriteria",
|
"MaxLengthCriteria",
|
||||||
"MaxTimeCriteria",
|
"MaxTimeCriteria",
|
||||||
"MinLengthLogitsProcessor",
|
"MinLengthLogitsProcessor",
|
||||||
@ -6460,7 +6459,6 @@ if TYPE_CHECKING:
|
|||||||
LogitNormalization,
|
LogitNormalization,
|
||||||
LogitsProcessor,
|
LogitsProcessor,
|
||||||
LogitsProcessorList,
|
LogitsProcessorList,
|
||||||
LogitsWarper,
|
|
||||||
MaxLengthCriteria,
|
MaxLengthCriteria,
|
||||||
MaxTimeCriteria,
|
MaxTimeCriteria,
|
||||||
MinLengthLogitsProcessor,
|
MinLengthLogitsProcessor,
|
||||||
|
@ -63,17 +63,6 @@ class Cache(torch.nn.Module):
|
|||||||
# TODO: deprecate this function in favor of `cache_position`
|
# TODO: deprecate this function in favor of `cache_position`
|
||||||
raise NotImplementedError("Make sure to implement `get_seq_length` in a subclass.")
|
raise NotImplementedError("Make sure to implement `get_seq_length` in a subclass.")
|
||||||
|
|
||||||
# Deprecate in favor of max-cache-shape because we want to be specifc by what we mean with "max_length"
|
|
||||||
# Prev some cache objects didn't have "max_length" (SlidingWindowCache or SinkCache) because the cache object technically handles
|
|
||||||
# infinite amount of tokens. In the codebase what we really need to check is the max capacity of certain cache instances, so
|
|
||||||
# we change naming to be more explicit
|
|
||||||
def get_max_length(self) -> Optional[int]:
|
|
||||||
logger.warning_once(
|
|
||||||
"`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. "
|
|
||||||
"Calling `get_max_cache()` will raise error from v4.48"
|
|
||||||
)
|
|
||||||
return self.get_max_cache_shape()
|
|
||||||
|
|
||||||
def get_max_cache_shape(self) -> Optional[int]:
|
def get_max_cache_shape(self) -> Optional[int]:
|
||||||
"""Returns the maximum sequence length (i.e. max capacity) of the cache object"""
|
"""Returns the maximum sequence length (i.e. max capacity) of the cache object"""
|
||||||
raise NotImplementedError("Make sure to implement `get_max_cache_shape` in a subclass.")
|
raise NotImplementedError("Make sure to implement `get_max_cache_shape` in a subclass.")
|
||||||
|
@ -68,7 +68,6 @@ else:
|
|||||||
"LogitNormalization",
|
"LogitNormalization",
|
||||||
"LogitsProcessor",
|
"LogitsProcessor",
|
||||||
"LogitsProcessorList",
|
"LogitsProcessorList",
|
||||||
"LogitsWarper",
|
|
||||||
"MinLengthLogitsProcessor",
|
"MinLengthLogitsProcessor",
|
||||||
"MinNewTokensLengthLogitsProcessor",
|
"MinNewTokensLengthLogitsProcessor",
|
||||||
"MinPLogitsWarper",
|
"MinPLogitsWarper",
|
||||||
@ -89,7 +88,6 @@ else:
|
|||||||
"WatermarkLogitsProcessor",
|
"WatermarkLogitsProcessor",
|
||||||
]
|
]
|
||||||
_import_structure["stopping_criteria"] = [
|
_import_structure["stopping_criteria"] = [
|
||||||
"MaxNewTokensCriteria",
|
|
||||||
"MaxLengthCriteria",
|
"MaxLengthCriteria",
|
||||||
"MaxTimeCriteria",
|
"MaxTimeCriteria",
|
||||||
"ConfidenceCriteria",
|
"ConfidenceCriteria",
|
||||||
@ -230,7 +228,6 @@ if TYPE_CHECKING:
|
|||||||
LogitNormalization,
|
LogitNormalization,
|
||||||
LogitsProcessor,
|
LogitsProcessor,
|
||||||
LogitsProcessorList,
|
LogitsProcessorList,
|
||||||
LogitsWarper,
|
|
||||||
MinLengthLogitsProcessor,
|
MinLengthLogitsProcessor,
|
||||||
MinNewTokensLengthLogitsProcessor,
|
MinNewTokensLengthLogitsProcessor,
|
||||||
MinPLogitsWarper,
|
MinPLogitsWarper,
|
||||||
@ -254,7 +251,6 @@ if TYPE_CHECKING:
|
|||||||
ConfidenceCriteria,
|
ConfidenceCriteria,
|
||||||
EosTokenCriteria,
|
EosTokenCriteria,
|
||||||
MaxLengthCriteria,
|
MaxLengthCriteria,
|
||||||
MaxNewTokensCriteria,
|
|
||||||
MaxTimeCriteria,
|
MaxTimeCriteria,
|
||||||
StoppingCriteria,
|
StoppingCriteria,
|
||||||
StoppingCriteriaList,
|
StoppingCriteriaList,
|
||||||
|
@ -52,22 +52,6 @@ class LogitsProcessor:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LogitsWarper:
|
|
||||||
"""Abstract base class for all logit warpers that can be applied during generation with multinomial sampling."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
logger.warning_once(
|
|
||||||
"`LogitsWarper` is deprecated and will be removed in v4.48. Your class should inherit `LogitsProcessor` "
|
|
||||||
"instead, which has the same properties and interface."
|
|
||||||
)
|
|
||||||
|
|
||||||
@add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING)
|
|
||||||
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
|
|
||||||
raise NotImplementedError(
|
|
||||||
f"{self.__class__} is an abstract class. Only classes inheriting this class can be called."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class LogitsProcessorList(list):
|
class LogitsProcessorList(list):
|
||||||
"""
|
"""
|
||||||
This class can be used to create a list of [`LogitsProcessor`] to subsequently process a `scores` input tensor.
|
This class can be used to create a list of [`LogitsProcessor`] to subsequently process a `scores` input tensor.
|
||||||
|
@ -467,28 +467,6 @@ class GPTNeoXAttention(nn.Module):
|
|||||||
return target_dtype
|
return target_dtype
|
||||||
|
|
||||||
|
|
||||||
# TODO Remove in deprecation cycle
|
|
||||||
class GPTNeoXFlashAttention2(GPTNeoXAttention):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
logger.warning_once(
|
|
||||||
"The `GPTNeoXFlashAttention2` class is deprecated in favor of simply modifying the `config._attn_implementation`"
|
|
||||||
"attribute of the `GPTNeoXAttention` class! It will be removed in v4.48"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# TODO Remove in deprecation cycle
|
|
||||||
class GPTNeoXSdpaAttention(GPTNeoXAttention):
|
|
||||||
def __init__(self, config, layer_idx=None):
|
|
||||||
super().__init__(config, layer_idx=layer_idx)
|
|
||||||
|
|
||||||
logger.warning_once(
|
|
||||||
"The `GPTNeoXSdpaAttention` class is deprecated in favor of simply modifying the `config._attn_implementation`"
|
|
||||||
"attribute of the `GPTNeoXAttention` class! It will be removed in v4.48"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding with Llama->GPTNeoX
|
# Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding with Llama->GPTNeoX
|
||||||
class GPTNeoXRotaryEmbedding(nn.Module):
|
class GPTNeoXRotaryEmbedding(nn.Module):
|
||||||
def __init__(self, config: GPTNeoXConfig, device=None):
|
def __init__(self, config: GPTNeoXConfig, device=None):
|
||||||
@ -600,14 +578,6 @@ class GPTNeoXMLP(nn.Module):
|
|||||||
return hidden_states
|
return hidden_states
|
||||||
|
|
||||||
|
|
||||||
GPT_NEOX_ATTENTION_CLASSES = {
|
|
||||||
"eager": GPTNeoXAttention,
|
|
||||||
"flash_attention_2": GPTNeoXFlashAttention2,
|
|
||||||
"sdpa": GPTNeoXSdpaAttention,
|
|
||||||
"flex_attention": GPTNeoXAttention,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class GPTNeoXLayer(nn.Module):
|
class GPTNeoXLayer(nn.Module):
|
||||||
def __init__(self, config, layer_idx):
|
def __init__(self, config, layer_idx):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
@ -616,7 +586,7 @@ class GPTNeoXLayer(nn.Module):
|
|||||||
self.post_attention_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
self.post_attention_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
||||||
self.post_attention_dropout = nn.Dropout(config.hidden_dropout)
|
self.post_attention_dropout = nn.Dropout(config.hidden_dropout)
|
||||||
self.post_mlp_dropout = nn.Dropout(config.hidden_dropout)
|
self.post_mlp_dropout = nn.Dropout(config.hidden_dropout)
|
||||||
self.attention = GPT_NEOX_ATTENTION_CLASSES[config._attn_implementation](config, layer_idx)
|
self.attention = GPTNeoXAttention(config, layer_idx)
|
||||||
self.mlp = GPTNeoXMLP(config)
|
self.mlp = GPTNeoXMLP(config)
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
|
@ -352,13 +352,6 @@ class LogitsProcessorList(metaclass=DummyObject):
|
|||||||
requires_backends(self, ["torch"])
|
requires_backends(self, ["torch"])
|
||||||
|
|
||||||
|
|
||||||
class LogitsWarper(metaclass=DummyObject):
|
|
||||||
_backends = ["torch"]
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
requires_backends(self, ["torch"])
|
|
||||||
|
|
||||||
|
|
||||||
class MaxLengthCriteria(metaclass=DummyObject):
|
class MaxLengthCriteria(metaclass=DummyObject):
|
||||||
_backends = ["torch"]
|
_backends = ["torch"]
|
||||||
|
|
||||||
|
@ -70,7 +70,6 @@ OBJECTS_TO_IGNORE = [
|
|||||||
# Deprecated
|
# Deprecated
|
||||||
"InputExample",
|
"InputExample",
|
||||||
"InputFeatures",
|
"InputFeatures",
|
||||||
"LogitsWarper",
|
|
||||||
# Signature is *args/**kwargs
|
# Signature is *args/**kwargs
|
||||||
"TFSequenceSummary",
|
"TFSequenceSummary",
|
||||||
"TFBertTokenizer",
|
"TFBertTokenizer",
|
||||||
|
@ -946,7 +946,6 @@ DEPRECATED_OBJECTS = [
|
|||||||
"LineByLineTextDataset",
|
"LineByLineTextDataset",
|
||||||
"LineByLineWithRefDataset",
|
"LineByLineWithRefDataset",
|
||||||
"LineByLineWithSOPTextDataset",
|
"LineByLineWithSOPTextDataset",
|
||||||
"LogitsWarper",
|
|
||||||
"NerPipeline",
|
"NerPipeline",
|
||||||
"PretrainedBartModel",
|
"PretrainedBartModel",
|
||||||
"PretrainedFSMTModel",
|
"PretrainedFSMTModel",
|
||||||
|
Loading…
Reference in New Issue
Block a user