Use newer typing notation

Signed-off-by: cyy <cyyever@outlook.com>
This commit is contained in:
cyy 2025-06-20 17:15:06 +08:00
parent 31d30b7224
commit 1d5fd195df
70 changed files with 2176 additions and 2263 deletions

View File

@ -17,7 +17,7 @@ from __future__ import annotations
import asyncio import asyncio
from queue import Queue from queue import Queue
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING
if TYPE_CHECKING: if TYPE_CHECKING:
@ -206,7 +206,7 @@ class TextIteratorStreamer(TextStreamer):
""" """
def __init__( def __init__(
self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: float | None = None, **decode_kwargs
): ):
super().__init__(tokenizer, skip_prompt, **decode_kwargs) super().__init__(tokenizer, skip_prompt, **decode_kwargs)
self.text_queue = Queue() self.text_queue = Queue()
@ -284,7 +284,7 @@ class AsyncTextIteratorStreamer(TextStreamer):
""" """
def __init__( def __init__(
self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: float | None = None, **decode_kwargs
): ):
super().__init__(tokenizer, skip_prompt, **decode_kwargs) super().__init__(tokenizer, skip_prompt, **decode_kwargs)
self.text_queue = asyncio.Queue() self.text_queue = asyncio.Queue()

View File

@ -17,7 +17,6 @@ import operator
import os import os
import re import re
from functools import partial, reduce from functools import partial, reduce
from typing import Optional, Union
import torch import torch
import torch.distributed as dist import torch.distributed as dist
@ -93,7 +92,7 @@ def initialize_tensor_parallelism(tp_plan, tp_size=None):
return tp_device, device_map, device_mesh return tp_device, device_map, device_mesh
def _blocks_to_block_sizes(total_size: int, blocks: Union[int, list[int]]) -> list[int]: def _blocks_to_block_sizes(total_size: int, blocks: int | list[int]) -> list[int]:
""" """
Convert block count or proportions to block sizes. Convert block count or proportions to block sizes.
@ -119,7 +118,7 @@ def _blocks_to_block_sizes(total_size: int, blocks: Union[int, list[int]]) -> li
return [single_size] * blocks return [single_size] * blocks
def _get_parameter_tp_plan(parameter_name: str, tp_plan: dict[str, str]) -> Optional[str]: def _get_parameter_tp_plan(parameter_name: str, tp_plan: dict[str, str]) -> str | None:
""" """
Get the TP style for a parameter from the TP plan. Get the TP style for a parameter from the TP plan.
@ -366,8 +365,8 @@ class GatherParallel(TensorParallelLayer):
def __init__( def __init__(
self, self,
*, *,
input_layouts: Optional[Placement] = None, input_layouts: Placement | None = None,
output_layouts: Optional[Placement] = None, output_layouts: Placement | None = None,
use_local_output: bool = True, use_local_output: bool = True,
): ):
super().__init__() super().__init__()
@ -460,8 +459,8 @@ class ColwiseParallel(TensorParallelLayer):
def __init__( def __init__(
self, self,
*, *,
input_layouts: Optional[Placement] = None, input_layouts: Placement | None = None,
output_layouts: Optional[Placement] = None, output_layouts: Placement | None = None,
use_local_output: bool = True, use_local_output: bool = True,
use_dtensor=True, use_dtensor=True,
): ):
@ -548,8 +547,8 @@ class RowwiseParallel(TensorParallelLayer):
def __init__( def __init__(
self, self,
*, *,
input_layouts: Optional[Placement] = None, input_layouts: Placement | None = None,
output_layouts: Optional[Placement] = None, output_layouts: Placement | None = None,
use_local_output: bool = True, use_local_output: bool = True,
use_dtensor=True, use_dtensor=True,
): ):
@ -887,7 +886,7 @@ def shard_and_distribute_module(
return param return param
def verify_tp_plan(expected_keys: list[str], tp_plan: Optional[dict[str, str]]): def verify_tp_plan(expected_keys: list[str], tp_plan: dict[str, str] | None):
""" """
Verify the TP plan of the model, log a warning if the layers that were not sharded and the rules that were not applied. Verify the TP plan of the model, log a warning if the layers that were not sharded and the rules that were not applied.
""" """

View File

@ -16,7 +16,6 @@ from __future__ import annotations
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional
import tensorflow as tf import tensorflow as tf
@ -44,7 +43,7 @@ class TFBaseModelOutput(ModelOutput):
heads. heads.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -64,8 +63,8 @@ class TFBaseModelOutputWithNoAttention(ModelOutput):
Hidden-states of the model at the output of each layer plus the optional initial embedding outputs. Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
hidden_states: Optional[tuple[tf.Tensor, ...]] = None hidden_states: tuple[tf.Tensor, ...] | None = None
@dataclass @dataclass
@ -96,8 +95,8 @@ class TFBaseModelOutputWithPooling(ModelOutput):
heads. heads.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
pooler_output: Optional[tf.Tensor] = None pooler_output: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -119,9 +118,9 @@ class TFBaseModelOutputWithPoolingAndNoAttention(ModelOutput):
Hidden-states of the model at the output of each layer plus the optional initial embedding outputs. Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
pooler_output: Optional[tf.Tensor] = None pooler_output: tf.Tensor | None = None
hidden_states: Optional[tuple[tf.Tensor, ...]] = None hidden_states: tuple[tf.Tensor, ...] | None = None
@dataclass @dataclass
@ -164,8 +163,8 @@ class TFBaseModelOutputWithPoolingAndCrossAttentions(ModelOutput):
weighted average in the cross-attention heads. weighted average in the cross-attention heads.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
pooler_output: Optional[tf.Tensor] = None pooler_output: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -202,7 +201,7 @@ class TFBaseModelOutputWithPast(ModelOutput):
heads. heads.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -235,7 +234,7 @@ class TFBaseModelOutputWithCrossAttentions(ModelOutput):
weighted average in the cross-attention heads. weighted average in the cross-attention heads.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
cross_attentions: tuple[tf.Tensor] | None = None cross_attentions: tuple[tf.Tensor] | None = None
@ -277,7 +276,7 @@ class TFBaseModelOutputWithPastAndCrossAttentions(ModelOutput):
weighted average in the cross-attention heads. weighted average in the cross-attention heads.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -334,7 +333,7 @@ class TFSeq2SeqModelOutput(ModelOutput):
self-attention heads. self-attention heads.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
decoder_hidden_states: tuple[tf.Tensor] | None = None decoder_hidden_states: tuple[tf.Tensor] | None = None
decoder_attentions: tuple[tf.Tensor] | None = None decoder_attentions: tuple[tf.Tensor] | None = None
@ -368,7 +367,7 @@ class TFCausalLMOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -403,7 +402,7 @@ class TFCausalLMOutputWithPast(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -445,7 +444,7 @@ class TFCausalLMOutputWithCrossAttentions(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -476,7 +475,7 @@ class TFMaskedLMOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -530,7 +529,7 @@ class TFSeq2SeqLMOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
decoder_hidden_states: tuple[tf.Tensor] | None = None decoder_hidden_states: tuple[tf.Tensor] | None = None
decoder_attentions: tuple[tf.Tensor] | None = None decoder_attentions: tuple[tf.Tensor] | None = None
@ -565,7 +564,7 @@ class TFNextSentencePredictorOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -594,7 +593,7 @@ class TFSequenceClassifierOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -645,7 +644,7 @@ class TFSeq2SeqSequenceClassifierOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
decoder_hidden_states: tuple[tf.Tensor] | None = None decoder_hidden_states: tuple[tf.Tensor] | None = None
decoder_attentions: tuple[tf.Tensor] | None = None decoder_attentions: tuple[tf.Tensor] | None = None
@ -687,7 +686,7 @@ class TFSemanticSegmenterOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -719,7 +718,7 @@ class TFSemanticSegmenterOutputWithNoAttention(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
@ -745,7 +744,7 @@ class TFImageClassifierOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -776,7 +775,7 @@ class TFMultipleChoiceModelOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -805,7 +804,7 @@ class TFTokenClassifierOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -836,8 +835,8 @@ class TFQuestionAnsweringModelOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
start_logits: Optional[tf.Tensor] = None start_logits: tf.Tensor | None = None
end_logits: Optional[tf.Tensor] = None end_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -887,8 +886,8 @@ class TFSeq2SeqQuestionAnsweringModelOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
start_logits: Optional[tf.Tensor] = None start_logits: tf.Tensor | None = None
end_logits: Optional[tf.Tensor] = None end_logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
decoder_hidden_states: tuple[tf.Tensor] | None = None decoder_hidden_states: tuple[tf.Tensor] | None = None
decoder_attentions: tuple[tf.Tensor] | None = None decoder_attentions: tuple[tf.Tensor] | None = None
@ -927,7 +926,7 @@ class TFSequenceClassifierOutputWithPast(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -950,8 +949,8 @@ class TFImageClassifierOutputWithNoAttention(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: Optional[tuple[tf.Tensor, ...]] = None hidden_states: tuple[tf.Tensor, ...] | None = None
@dataclass @dataclass
@ -977,7 +976,7 @@ class TFMaskedImageModelingOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
reconstruction: Optional[tf.Tensor] = None reconstruction: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None

View File

@ -27,7 +27,7 @@ import re
import warnings import warnings
from collections.abc import Mapping from collections.abc import Mapping
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Optional, Union from typing import TYPE_CHECKING, Any, Callable, Union
import h5py import h5py
import numpy as np import numpy as np
@ -1412,10 +1412,10 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
dataset: datasets.Dataset, # noqa:F821 dataset: datasets.Dataset, # noqa:F821
batch_size: int = 8, batch_size: int = 8,
shuffle: bool = True, shuffle: bool = True,
tokenizer: Optional[PreTrainedTokenizerBase] = None, tokenizer: PreTrainedTokenizerBase | None = None,
collate_fn: Optional[Callable] = None, collate_fn: Callable | None = None,
collate_fn_args: Optional[dict[str, Any]] = None, collate_fn_args: dict[str, Any] | None = None,
drop_remainder: Optional[bool] = None, drop_remainder: bool | None = None,
prefetch: bool = True, prefetch: bool = True,
): ):
""" """
@ -1811,14 +1811,14 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
self, self,
output_dir, output_dir,
model_name: str, model_name: str,
language: Optional[str] = None, language: str | None = None,
license: Optional[str] = None, license: str | None = None,
tags: Optional[str] = None, tags: str | None = None,
finetuned_from: Optional[str] = None, finetuned_from: str | None = None,
tasks: Optional[str] = None, tasks: str | None = None,
dataset_tags: Optional[Union[str, list[str]]] = None, dataset_tags: str | list[str] | None = None,
dataset: Optional[Union[str, list[str]]] = None, dataset: str | list[str] | None = None,
dataset_args: Optional[Union[str, list[str]]] = None, dataset_args: str | list[str] | None = None,
): ):
""" """
Creates a draft of a model card using the information available to the `Trainer`. Creates a draft of a model card using the information available to the `Trainer`.
@ -1887,7 +1887,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
self.build_in_name_scope() self.build_in_name_scope()
main_layer.set_input_embeddings(value) main_layer.set_input_embeddings(value)
def get_output_embeddings(self) -> Union[None, keras.layers.Layer]: def get_output_embeddings(self) -> None | keras.layers.Layer:
""" """
Returns the model's output embeddings Returns the model's output embeddings
@ -1924,7 +1924,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
self.build_in_name_scope() self.build_in_name_scope()
lm_head.set_output_embeddings(value) lm_head.set_output_embeddings(value)
def get_output_layer_with_bias(self) -> Union[None, keras.layers.Layer]: def get_output_layer_with_bias(self) -> None | keras.layers.Layer:
""" """
Get the layer that handles a bias attribute in case the model has an LM head with weights tied to the Get the layer that handles a bias attribute in case the model has an LM head with weights tied to the
embeddings embeddings
@ -1937,7 +1937,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
) )
return self.get_lm_head() return self.get_lm_head()
def get_prefix_bias_name(self) -> Union[None, str]: def get_prefix_bias_name(self) -> None | str:
""" """
Get the concatenated _prefix name of the bias from the model name to the parent layer Get the concatenated _prefix name of the bias from the model name to the parent layer
@ -1947,7 +1947,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning) warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return None return None
def get_bias(self) -> Union[None, dict[str, tf.Variable]]: def get_bias(self) -> None | dict[str, tf.Variable]:
""" """
Dict of bias attached to an LM head. The key represents the name of the bias attribute. Dict of bias attached to an LM head. The key represents the name of the bias attribute.
@ -1989,9 +1989,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
""" """
return None return None
def resize_token_embeddings( def resize_token_embeddings(self, new_num_tokens: int | None = None) -> keras.layers.Embedding | tf.Variable:
self, new_num_tokens: Optional[int] = None
) -> Union[keras.layers.Embedding, tf.Variable]:
""" """
Resizes input token embeddings matrix of the model if `new_num_tokens != config.vocab_size`. Resizes input token embeddings matrix of the model if `new_num_tokens != config.vocab_size`.
@ -2022,7 +2020,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
return model_embeds return model_embeds
def _v2_resized_token_embeddings(self, new_num_tokens: Optional[int] = None) -> keras.layers.Embedding: def _v2_resized_token_embeddings(self, new_num_tokens: int | None = None) -> keras.layers.Embedding:
""" """
Resizes input token embeddings matrix of the model if `new_num_tokens != config.vocab_size`. Resizes input token embeddings matrix of the model if `new_num_tokens != config.vocab_size`.
@ -2346,10 +2344,10 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
version=1, version=1,
push_to_hub=False, push_to_hub=False,
signatures=None, signatures=None,
max_shard_size: Union[int, str] = "5GB", max_shard_size: int | str = "5GB",
create_pr: bool = False, create_pr: bool = False,
safe_serialization: bool = False, safe_serialization: bool = False,
token: Optional[Union[str, bool]] = None, token: str | bool | None = None,
**kwargs, **kwargs,
): ):
""" """
@ -2525,16 +2523,16 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
@classmethod @classmethod
def from_pretrained( def from_pretrained(
cls, cls,
pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], pretrained_model_name_or_path: str | os.PathLike | None,
*model_args, *model_args,
config: Optional[Union[PretrainedConfig, str, os.PathLike]] = None, config: PretrainedConfig | str | os.PathLike | None = None,
cache_dir: Optional[Union[str, os.PathLike]] = None, cache_dir: str | os.PathLike | None = None,
ignore_mismatched_sizes: bool = False, ignore_mismatched_sizes: bool = False,
force_download: bool = False, force_download: bool = False,
local_files_only: bool = False, local_files_only: bool = False,
token: Optional[Union[str, bool]] = None, token: str | bool | None = None,
revision: str = "main", revision: str = "main",
use_safetensors: Optional[bool] = None, use_safetensors: bool | None = None,
**kwargs, **kwargs,
): ):
r""" r"""
@ -3121,13 +3119,13 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
def push_to_hub( def push_to_hub(
self, self,
repo_id: str, repo_id: str,
use_temp_dir: Optional[bool] = None, use_temp_dir: bool | None = None,
commit_message: Optional[str] = None, commit_message: str | None = None,
private: Optional[bool] = None, private: bool | None = None,
max_shard_size: Optional[Union[int, str]] = "10GB", max_shard_size: int | str | None = "10GB",
token: Optional[Union[bool, str]] = None, token: bool | str | None = None,
# (`use_auth_token` is deprecated: we have to keep it here as we don't have **kwargs) # (`use_auth_token` is deprecated: we have to keep it here as we don't have **kwargs)
use_auth_token: Optional[Union[bool, str]] = None, use_auth_token: bool | str | None = None,
create_pr: bool = False, create_pr: bool = False,
**base_model_card_args, **base_model_card_args,
) -> str: ) -> str:
@ -3314,7 +3312,7 @@ class TFSharedEmbeddings(keras.layers.Layer):
# TODO (joao): flagged for detection due to embeddings refactor # TODO (joao): flagged for detection due to embeddings refactor
def __init__(self, vocab_size: int, hidden_size: int, initializer_range: Optional[float] = None, **kwargs): def __init__(self, vocab_size: int, hidden_size: int, initializer_range: float | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.hidden_size = hidden_size self.hidden_size = hidden_size

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -164,10 +163,10 @@ class TFAlbertEmbeddings(keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call # Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
token_type_ids: Optional[tf.Tensor] = None, token_type_ids: tf.Tensor | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
past_key_values_length=0, past_key_values_length=0,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
@ -403,7 +402,7 @@ class TFAlbertLayerGroup(keras.layers.Layer):
output_attentions: bool, output_attentions: bool,
output_hidden_states: bool, output_hidden_states: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
layer_hidden_states = () if output_hidden_states else None layer_hidden_states = () if output_hidden_states else None
layer_attentions = () if output_attentions else None layer_attentions = () if output_attentions else None
@ -466,7 +465,7 @@ class TFAlbertTransformer(keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
hidden_states = self.embedding_hidden_mapping_in(inputs=hidden_states) hidden_states = self.embedding_hidden_mapping_in(inputs=hidden_states)
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
all_hidden_states = (hidden_states,) if output_hidden_states else None all_hidden_states = (hidden_states,) if output_hidden_states else None
@ -629,11 +628,11 @@ class TFAlbertMainLayer(keras.layers.Layer):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
@ -749,9 +748,9 @@ class TFAlbertForPreTrainingOutput(ModelOutput):
heads. heads.
""" """
loss: Optional[tf.Tensor] = None loss: tf.Tensor | None = None
prediction_logits: Optional[tf.Tensor] = None prediction_logits: tf.Tensor | None = None
sop_logits: Optional[tf.Tensor] = None sop_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -879,11 +878,11 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
outputs = self.albert( outputs = self.albert(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -942,13 +941,13 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel, TFAlbertPreTrainingLoss):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
sentence_order_label: np.ndarray | tf.Tensor | None = None, sentence_order_label: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFAlbertForPreTrainingOutput, tuple[tf.Tensor]]: ) -> TFAlbertForPreTrainingOutput | tuple[tf.Tensor]:
r""" r"""
Return: Return:
@ -1070,12 +1069,12 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss)
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1193,12 +1192,12 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1290,12 +1289,12 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1382,13 +1381,13 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.
@ -1485,12 +1484,12 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`

View File

@ -17,7 +17,6 @@
from __future__ import annotations from __future__ import annotations
import random import random
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -103,7 +102,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
return tf.tile(mask[None, None, :, :], (bsz, 1, 1, 1)) return tf.tile(mask[None, None, :, :], (bsz, 1, 1, 1))
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -129,7 +128,7 @@ class TFBartLearnedPositionalEmbedding(keras.layers.Embedding):
def call( def call(
self, self,
input_shape: Optional[tf.TensorShape] = None, input_shape: tf.TensorShape | None = None,
past_key_values_length: int = 0, past_key_values_length: int = 0,
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
): ):
@ -184,7 +183,7 @@ class TFBartAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]: ) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -334,7 +333,7 @@ class TFBartEncoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: np.ndarray | tf.Tensor | None, attention_mask: np.ndarray | tf.Tensor | None,
layer_head_mask: tf.Tensor | None, layer_head_mask: tf.Tensor | None,
training: Optional[bool] = False, training: bool | None = False,
) -> tf.Tensor: ) -> tf.Tensor:
""" """
Args: Args:
@ -427,8 +426,8 @@ class TFBartDecoderLayer(keras.layers.Layer):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None, cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_value: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]: ) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
""" """
Args: Args:
@ -750,7 +749,7 @@ class TFBartEncoder(keras.layers.Layer):
config: BartConfig config: BartConfig
""" """
def __init__(self, config: BartConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: BartConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.dropout = keras.layers.Dropout(config.dropout) self.dropout = keras.layers.Dropout(config.dropout)
@ -776,11 +775,11 @@ class TFBartEncoder(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
""" """
Args: Args:
input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`): input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`):
@ -910,7 +909,7 @@ class TFBartDecoder(keras.layers.Layer):
embed_tokens: output embedding embed_tokens: output embedding
""" """
def __init__(self, config: BartConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: BartConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.padding_idx = config.pad_token_id self.padding_idx = config.pad_token_id
@ -938,13 +937,13 @@ class TFBartDecoder(keras.layers.Layer):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
r""" r"""
Args: Args:
input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`): input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`):
@ -1167,17 +1166,17 @@ class TFBartMainLayer(keras.layers.Layer):
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None, decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None, encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs, **kwargs,
) -> Union[TFSeq2SeqModelOutput, tuple[tf.Tensor]]: ) -> TFSeq2SeqModelOutput | tuple[tf.Tensor]:
# different to other models, Bart automatically creates decoder_input_ids from # different to other models, Bart automatically creates decoder_input_ids from
# input_ids if no decoder_input_ids are provided # input_ids if no decoder_input_ids are provided
if decoder_input_ids is None and decoder_inputs_embeds is None: if decoder_input_ids is None and decoder_inputs_embeds is None:
@ -1297,17 +1296,17 @@ class TFBartModel(TFBartPretrainedModel):
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None, decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None, encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs, **kwargs,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
outputs = self.model( outputs = self.model(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -1429,17 +1428,17 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel, TFCausalLanguageMode
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None, decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[TFBaseModelOutput] = None, encoder_outputs: TFBaseModelOutput | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSeq2SeqLMOutput, tuple[tf.Tensor]]: ) -> TFSeq2SeqLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
@ -1600,17 +1599,17 @@ class TFBartForSequenceClassification(TFBartPretrainedModel, TFSequenceClassific
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None, decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[TFBaseModelOutput] = None, encoder_outputs: TFBaseModelOutput | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSeq2SeqSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSeq2SeqSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,

View File

@ -20,7 +20,6 @@ from __future__ import annotations
import math import math
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -161,10 +160,10 @@ class TFBertEmbeddings(keras.layers.Layer):
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
token_type_ids: Optional[tf.Tensor] = None, token_type_ids: tf.Tensor | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
past_key_values_length=0, past_key_values_length=0,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
@ -589,12 +588,12 @@ class TFBertEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None, encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None, encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None, past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool], use_cache: bool | None,
output_attentions: bool, output_attentions: bool,
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -851,13 +850,13 @@ class TFBertMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder: if not self.config.is_decoder:
use_cache = False use_cache = False
@ -1048,10 +1047,10 @@ class TFBertForPreTrainingOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
prediction_logits: Optional[tf.Tensor] = None prediction_logits: tf.Tensor | None = None
seq_relationship_logits: Optional[tf.Tensor] = None seq_relationship_logits: tf.Tensor | None = None
hidden_states: Optional[Union[tuple[tf.Tensor], tf.Tensor]] = None hidden_states: tuple[tf.Tensor] | tf.Tensor | None = None
attentions: Optional[Union[tuple[tf.Tensor], tf.Tensor]] = None attentions: tuple[tf.Tensor] | tf.Tensor | None = None
BERT_START_DOCSTRING = r""" BERT_START_DOCSTRING = r"""
@ -1179,13 +1178,13 @@ class TFBertModel(TFBertPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1273,13 +1272,13 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
next_sentence_label: np.ndarray | tf.Tensor | None = None, next_sentence_label: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBertForPreTrainingOutput, tuple[tf.Tensor]]: ) -> TFBertForPreTrainingOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1405,12 +1404,12 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1509,15 +1508,15 @@ class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs, **kwargs,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]: ) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1617,12 +1616,12 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel, TFNextSentencePredi
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
next_sentence_label: np.ndarray | tf.Tensor | None = None, next_sentence_label: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFNextSentencePredictorOutput, tuple[tf.Tensor]]: ) -> TFNextSentencePredictorOutput | tuple[tf.Tensor]:
r""" r"""
Return: Return:
@ -1731,12 +1730,12 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1820,12 +1819,12 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1946,12 +1945,12 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -2045,13 +2044,13 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import os import os
import random import random
import warnings import warnings
from typing import Optional, Union
import tensorflow as tf import tensorflow as tf
@ -104,7 +103,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -179,7 +178,7 @@ class TFBlenderbotAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]: ) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -330,7 +329,7 @@ class TFBlenderbotEncoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor, attention_mask: tf.Tensor,
layer_head_mask: tf.Tensor, layer_head_mask: tf.Tensor,
training: Optional[bool] = False, training: bool | None = False,
): ):
""" """
Args: Args:
@ -425,7 +424,7 @@ class TFBlenderbotDecoderLayer(keras.layers.Layer):
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None, cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: tuple[tf.Tensor] | None = None, past_key_value: tuple[tf.Tensor] | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]: ) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
""" """
Args: Args:
@ -687,7 +686,7 @@ class TFBlenderbotEncoder(keras.layers.Layer):
config: BlenderbotConfig config: BlenderbotConfig
""" """
def __init__(self, config: BlenderbotConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: BlenderbotConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.dropout = keras.layers.Dropout(config.dropout) self.dropout = keras.layers.Dropout(config.dropout)
@ -859,7 +858,7 @@ class TFBlenderbotDecoder(keras.layers.Layer):
embed_tokens: output embedding embed_tokens: output embedding
""" """
def __init__(self, config: BlenderbotConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: BlenderbotConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.padding_idx = config.pad_token_id self.padding_idx = config.pad_token_id
@ -1128,7 +1127,7 @@ class TFBlenderbotMainLayer(keras.layers.Layer):
head_mask=None, head_mask=None,
decoder_head_mask=None, decoder_head_mask=None,
cross_attn_head_mask=None, cross_attn_head_mask=None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None, encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values=None, past_key_values=None,
inputs_embeds=None, inputs_embeds=None,
decoder_inputs_embeds=None, decoder_inputs_embeds=None,
@ -1230,7 +1229,7 @@ class TFBlenderbotModel(TFBlenderbotPreTrainedModel):
return self.model.decoder return self.model.decoder
@classmethod @classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs): def from_pretrained(cls, pretrained_model_name_or_path: str | os.PathLike | None, *model_args, **kwargs):
if pretrained_model_name_or_path == "facebook/blenderbot-90M": if pretrained_model_name_or_path == "facebook/blenderbot-90M":
from ..blenderbot_small import TFBlenderbotSmallModel from ..blenderbot_small import TFBlenderbotSmallModel
@ -1262,17 +1261,17 @@ class TFBlenderbotModel(TFBlenderbotPreTrainedModel):
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None, decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None, encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: list[tf.Tensor] | None = None, past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs, **kwargs,
) -> Union[tuple[tf.Tensor], TFSeq2SeqModelOutput]: ) -> tuple[tf.Tensor] | TFSeq2SeqModelOutput:
outputs = self.model( outputs = self.model(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -1385,7 +1384,7 @@ class TFBlenderbotForConditionalGeneration(TFBlenderbotPreTrainedModel, TFCausal
self.bias_layer.bias.assign(value["final_logits_bias"]) self.bias_layer.bias.assign(value["final_logits_bias"])
@classmethod @classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs): def from_pretrained(cls, pretrained_model_name_or_path: str | os.PathLike | None, *model_args, **kwargs):
if pretrained_model_name_or_path == "facebook/blenderbot-90M": if pretrained_model_name_or_path == "facebook/blenderbot-90M":
from ..blenderbot_small import TFBlenderbotSmallForConditionalGeneration from ..blenderbot_small import TFBlenderbotSmallForConditionalGeneration
@ -1414,17 +1413,17 @@ class TFBlenderbotForConditionalGeneration(TFBlenderbotPreTrainedModel, TFCausal
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None, decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None, encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: list[tf.Tensor] | None = None, past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple[tf.Tensor], TFSeq2SeqLMOutput]: ) -> tuple[tf.Tensor] | TFSeq2SeqLMOutput:
r""" r"""
labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -17,7 +17,6 @@
from __future__ import annotations from __future__ import annotations
import random import random
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -103,7 +102,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -179,7 +178,7 @@ class TFBlenderbotSmallAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]: ) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -330,7 +329,7 @@ class TFBlenderbotSmallEncoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: np.ndarray | tf.Tensor | None, attention_mask: np.ndarray | tf.Tensor | None,
layer_head_mask: tf.Tensor | None, layer_head_mask: tf.Tensor | None,
training: Optional[bool] = False, training: bool | None = False,
) -> tf.Tensor: ) -> tf.Tensor:
""" """
Args: Args:
@ -424,8 +423,8 @@ class TFBlenderbotSmallDecoderLayer(keras.layers.Layer):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None, cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_value: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]: ) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
""" """
Args: Args:
@ -691,7 +690,7 @@ class TFBlenderbotSmallEncoder(keras.layers.Layer):
config: BlenderbotSmallConfig config: BlenderbotSmallConfig
""" """
def __init__(self, config: BlenderbotSmallConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: BlenderbotSmallConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.dropout = keras.layers.Dropout(config.dropout) self.dropout = keras.layers.Dropout(config.dropout)
@ -863,7 +862,7 @@ class TFBlenderbotSmallDecoder(keras.layers.Layer):
embed_tokens: output embedding embed_tokens: output embedding
""" """
def __init__(self, config: BlenderbotSmallConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: BlenderbotSmallConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.padding_idx = config.pad_token_id self.padding_idx = config.pad_token_id
@ -1129,7 +1128,7 @@ class TFBlenderbotSmallMainLayer(keras.layers.Layer):
head_mask=None, head_mask=None,
decoder_head_mask=None, decoder_head_mask=None,
cross_attn_head_mask=None, cross_attn_head_mask=None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None, encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values=None, past_key_values=None,
inputs_embeds=None, inputs_embeds=None,
decoder_inputs_embeds=None, decoder_inputs_embeds=None,
@ -1247,17 +1246,17 @@ class TFBlenderbotSmallModel(TFBlenderbotSmallPreTrainedModel):
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None, decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None, encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: list[tf.Tensor] | None = None, past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs, **kwargs,
) -> Union[tuple[tf.Tensor], TFSeq2SeqModelOutput]: ) -> tuple[tf.Tensor] | TFSeq2SeqModelOutput:
outputs = self.model( outputs = self.model(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -1383,17 +1382,17 @@ class TFBlenderbotSmallForConditionalGeneration(TFBlenderbotSmallPreTrainedModel
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None, decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[TFBaseModelOutput] = None, encoder_outputs: TFBaseModelOutput | None = None,
past_key_values: list[tf.Tensor] | None = None, past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple[tf.Tensor], TFSeq2SeqLMOutput]: ) -> tuple[tf.Tensor] | TFSeq2SeqLMOutput:
r""" r"""
labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -18,7 +18,7 @@ from __future__ import annotations
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Optional, Union from typing import Any
import tensorflow as tf import tensorflow as tf
@ -96,7 +96,7 @@ class TFBlipForConditionalGenerationModelOutput(ModelOutput):
loss: tuple[tf.Tensor] | None = None loss: tuple[tf.Tensor] | None = None
logits: tuple[tf.Tensor] | None = None logits: tuple[tf.Tensor] | None = None
image_embeds: tf.Tensor | None = None image_embeds: tf.Tensor | None = None
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -138,7 +138,7 @@ class TFBlipTextVisionModelOutput(ModelOutput):
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
image_embeds: tf.Tensor | None = None image_embeds: tf.Tensor | None = None
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -179,7 +179,7 @@ class TFBlipImageTextMatchingModelOutput(ModelOutput):
itm_score: tf.Tensor | None = None itm_score: tf.Tensor | None = None
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
image_embeds: tf.Tensor | None = None image_embeds: tf.Tensor | None = None
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
vision_pooler_output: tf.Tensor | None = None vision_pooler_output: tf.Tensor | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -209,10 +209,10 @@ class TFBlipOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits_per_image: Optional[tf.Tensor] = None logits_per_image: tf.Tensor | None = None
logits_per_text: Optional[tf.Tensor] = None logits_per_text: tf.Tensor | None = None
text_embeds: Optional[tf.Tensor] = None text_embeds: tf.Tensor | None = None
image_embeds: Optional[tf.Tensor] = None image_embeds: tf.Tensor | None = None
text_model_output: TFBaseModelOutputWithPooling = None text_model_output: TFBaseModelOutputWithPooling = None
vision_model_output: TFBaseModelOutputWithPooling = None vision_model_output: TFBaseModelOutputWithPooling = None
@ -309,9 +309,9 @@ class TFBlipTextEmbeddings(keras.layers.Layer):
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
) -> tf.Tensor: ) -> tf.Tensor:
""" """
Applies embedding based on inputs tensor. Applies embedding based on inputs tensor.
@ -367,8 +367,8 @@ class TFBlipAttention(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
training: Optional[bool] = None, training: bool | None = None,
) -> tuple[tf.Tensor, tf.Tensor | None, tuple[tf.Tensor] | None]: ) -> tuple[tf.Tensor, tf.Tensor | None, tuple[tf.Tensor] | None]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -470,8 +470,8 @@ class TFBlipEncoderLayer(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor, attention_mask: tf.Tensor,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
training: Optional[bool] = None, training: bool | None = None,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
""" """
Args: Args:
@ -624,11 +624,11 @@ class TFBlipEncoder(keras.layers.Layer):
self, self,
inputs_embeds, inputs_embeds,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = None, training: bool | None = None,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
r""" r"""
Args: Args:
inputs_embeds (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`): inputs_embeds (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
@ -723,11 +723,11 @@ class TFBlipVisionModel(TFBlipPreTrainedModel):
def call( def call(
self, self,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = None, training: bool | None = None,
) -> Union[tuple, TFBaseModelOutputWithPooling]: ) -> tuple | TFBaseModelOutputWithPooling:
r""" r"""
Returns: Returns:
@ -861,12 +861,12 @@ class TFBlipMainLayer(keras.layers.Layer):
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
return_loss: Optional[bool] = None, return_loss: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = None, training: bool | None = None,
) -> Union[tuple, TFBlipOutput]: ) -> tuple | TFBlipOutput:
# Use BLIP model's config for some fields (if specified) instead of those of vision & text components. # Use BLIP model's config for some fields (if specified) instead of those of vision & text components.
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
@ -954,12 +954,12 @@ class TFBlipModel(TFBlipPreTrainedModel):
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
return_loss: Optional[bool] = None, return_loss: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = None, training: bool | None = None,
) -> Union[tuple, TFBlipOutput]: ) -> tuple | TFBlipOutput:
r""" r"""
Returns: Returns:
@ -1003,7 +1003,7 @@ class TFBlipModel(TFBlipPreTrainedModel):
input_ids: tf.Tensor | None = None, input_ids: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
) -> tf.Tensor: ) -> tf.Tensor:
r""" r"""
Returns: Returns:
@ -1039,7 +1039,7 @@ class TFBlipModel(TFBlipPreTrainedModel):
def get_image_features( def get_image_features(
self, self,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
) -> tf.Tensor: ) -> tf.Tensor:
r""" r"""
Returns: Returns:
@ -1116,12 +1116,12 @@ class TFBlipForConditionalGeneration(TFBlipPreTrainedModel):
pixel_values: tf.Tensor, pixel_values: tf.Tensor,
input_ids: tf.Tensor | None = None, input_ids: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = None, training: bool | None = None,
) -> Union[tuple, TFBlipForConditionalGenerationModelOutput]: ) -> tuple | TFBlipForConditionalGenerationModelOutput:
r""" r"""
Returns: Returns:
@ -1333,12 +1333,12 @@ class TFBlipForQuestionAnswering(TFBlipPreTrainedModel):
decoder_input_ids: tf.Tensor | None = None, decoder_input_ids: tf.Tensor | None = None,
decoder_attention_mask: tf.Tensor | None = None, decoder_attention_mask: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = None, training: bool | None = None,
) -> Union[tuple, TFBlipTextVisionModelOutput]: ) -> tuple | TFBlipTextVisionModelOutput:
r""" r"""
Returns: Returns:
@ -1586,13 +1586,13 @@ class TFBlipForImageTextRetrieval(TFBlipPreTrainedModel):
self, self,
input_ids: tf.Tensor, input_ids: tf.Tensor,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
use_itm_head: Optional[bool] = True, use_itm_head: bool | None = True,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = None, training: bool | None = None,
) -> Union[tuple, TFBlipImageTextMatchingModelOutput]: ) -> tuple | TFBlipImageTextMatchingModelOutput:
r""" r"""
Returns: Returns:

View File

@ -17,7 +17,6 @@
from __future__ import annotations from __future__ import annotations
import math import math
from typing import Optional
import tensorflow as tf import tensorflow as tf
@ -303,7 +302,7 @@ class TFBlipTextSelfOutput(keras.layers.Layer):
self.dropout = keras.layers.Dropout(rate=config.hidden_dropout_prob) self.dropout = keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.config = config self.config = config
def call(self, hidden_states: tf.Tensor, input_tensor: tf.Tensor, training: Optional[bool] = None) -> tf.Tensor: def call(self, hidden_states: tf.Tensor, input_tensor: tf.Tensor, training: bool | None = None) -> tf.Tensor:
hidden_states = self.dense(inputs=hidden_states) hidden_states = self.dense(inputs=hidden_states)
hidden_states = self.dropout(inputs=hidden_states, training=training) hidden_states = self.dropout(inputs=hidden_states, training=training)
hidden_states = self.LayerNorm(inputs=hidden_states + input_tensor) hidden_states = self.LayerNorm(inputs=hidden_states + input_tensor)
@ -338,8 +337,8 @@ class TFBlipTextAttention(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None = None, encoder_hidden_states: tf.Tensor | None = None,
encoder_attention_mask: tf.Tensor | None = None, encoder_attention_mask: tf.Tensor | None = None,
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
training: Optional[bool] = None, training: bool | None = None,
): ):
self_outputs = self.self( self_outputs = self.self(
hidden_states, hidden_states,

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math import math
import warnings import warnings
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -695,12 +694,12 @@ class TFCamembertEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None, encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None, encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None, past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool], use_cache: bool | None,
output_attentions: bool, output_attentions: bool,
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -809,13 +808,13 @@ class TFCamembertMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder: if not self.config.is_decoder:
use_cache = False use_cache = False
@ -1008,13 +1007,13 @@ class TFCamembertModel(TFCamembertPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFBaseModelOutputWithPoolingAndCrossAttentions]: ) -> tuple | TFBaseModelOutputWithPoolingAndCrossAttentions:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1164,12 +1163,12 @@ class TFCamembertForMaskedLM(TFCamembertPreTrainedModel, TFMaskedLanguageModelin
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1294,12 +1293,12 @@ class TFCamembertForSequenceClassification(TFCamembertPreTrainedModel, TFSequenc
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1390,12 +1389,12 @@ class TFCamembertForTokenClassification(TFCamembertPreTrainedModel, TFTokenClass
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1482,12 +1481,12 @@ class TFCamembertForMultipleChoice(TFCamembertPreTrainedModel, TFMultipleChoiceL
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1586,13 +1585,13 @@ class TFCamembertForQuestionAnswering(TFCamembertPreTrainedModel, TFQuestionAnsw
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.
@ -1706,14 +1705,14 @@ class TFCamembertForCausalLM(TFCamembertPreTrainedModel, TFCausalLanguageModelin
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]: ) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if

View File

@ -18,7 +18,7 @@ from __future__ import annotations
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Optional, Union from typing import Any
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -55,7 +55,7 @@ LARGE_NEGATIVE = -1e8
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -108,10 +108,10 @@ class TFCLIPOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits_per_image: Optional[tf.Tensor] = None logits_per_image: tf.Tensor | None = None
logits_per_text: Optional[tf.Tensor] = None logits_per_text: tf.Tensor | None = None
text_embeds: Optional[tf.Tensor] = None text_embeds: tf.Tensor | None = None
image_embeds: Optional[tf.Tensor] = None image_embeds: tf.Tensor | None = None
text_model_output: TFBaseModelOutputWithPooling = None text_model_output: TFBaseModelOutputWithPooling = None
vision_model_output: TFBaseModelOutputWithPooling = None vision_model_output: TFBaseModelOutputWithPooling = None
@ -225,9 +225,9 @@ class TFCLIPTextEmbeddings(keras.layers.Layer):
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
) -> tf.Tensor: ) -> tf.Tensor:
""" """
Applies embedding based on inputs tensor. Applies embedding based on inputs tensor.
@ -498,7 +498,7 @@ class TFCLIPEncoder(keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
@ -560,7 +560,7 @@ class TFCLIPTextTransformer(keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
input_shape = shape_list(input_ids) input_shape = shape_list(input_ids)
embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids) embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids)
@ -677,11 +677,11 @@ class TFCLIPTextMainLayer(keras.layers.Layer):
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if input_ids is None: if input_ids is None:
raise ValueError("You have to specify input_ids") raise ValueError("You have to specify input_ids")
@ -728,7 +728,7 @@ class TFCLIPVisionTransformer(keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
embedding_output = self.embeddings(pixel_values=pixel_values) embedding_output = self.embeddings(pixel_values=pixel_values)
embedding_output = self.pre_layernorm(inputs=embedding_output) embedding_output = self.pre_layernorm(inputs=embedding_output)
@ -790,11 +790,11 @@ class TFCLIPVisionMainLayer(keras.layers.Layer):
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if pixel_values is None: if pixel_values is None:
raise ValueError("You have to specify pixel_values") raise ValueError("You have to specify pixel_values")
@ -892,9 +892,9 @@ class TFCLIPMainLayer(keras.layers.Layer):
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
if input_ids is None: if input_ids is None:
@ -924,9 +924,9 @@ class TFCLIPMainLayer(keras.layers.Layer):
def get_image_features( def get_image_features(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
if pixel_values is None: if pixel_values is None:
@ -952,12 +952,12 @@ class TFCLIPMainLayer(keras.layers.Layer):
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
return_loss: Optional[bool] = None, return_loss: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFCLIPOutput, tuple[tf.Tensor]]: ) -> TFCLIPOutput | tuple[tf.Tensor]:
if input_ids is None: if input_ids is None:
raise ValueError("You have to specify either input_ids") raise ValueError("You have to specify either input_ids")
if pixel_values is None: if pixel_values is None:
@ -1191,11 +1191,11 @@ class TFCLIPTextModel(TFCLIPPreTrainedModel):
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:
@ -1250,11 +1250,11 @@ class TFCLIPVisionModel(TFCLIPPreTrainedModel):
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:
@ -1313,9 +1313,9 @@ class TFCLIPModel(TFCLIPPreTrainedModel):
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
r""" r"""
@ -1351,9 +1351,9 @@ class TFCLIPModel(TFCLIPPreTrainedModel):
def get_image_features( def get_image_features(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
r""" r"""
@ -1397,12 +1397,12 @@ class TFCLIPModel(TFCLIPPreTrainedModel):
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
return_loss: Optional[bool] = None, return_loss: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFCLIPOutput, tuple[tf.Tensor]]: ) -> TFCLIPOutput | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:

View File

@ -16,8 +16,6 @@
from __future__ import annotations from __future__ import annotations
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -106,10 +104,10 @@ class TFConvBertEmbeddings(keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call # Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
token_type_ids: Optional[tf.Tensor] = None, token_type_ids: tf.Tensor | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
past_key_values_length=0, past_key_values_length=0,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
@ -860,16 +858,16 @@ class TFConvBertModel(TFConvBertPreTrainedModel):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: Optional[Union[np.array, tf.Tensor]] = None, attention_mask: np.array | tf.Tensor | None = None,
token_type_ids: Optional[Union[np.array, tf.Tensor]] = None, token_type_ids: np.array | tf.Tensor | None = None,
position_ids: Optional[Union[np.array, tf.Tensor]] = None, position_ids: np.array | tf.Tensor | None = None,
head_mask: Optional[Union[np.array, tf.Tensor]] = None, head_mask: np.array | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
outputs = self.convbert( outputs = self.convbert(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -995,12 +993,12 @@ class TFConvBertForMaskedLM(TFConvBertPreTrainedModel, TFMaskedLanguageModelingL
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFMaskedLMOutput]: ) -> tuple | TFMaskedLMOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1120,12 +1118,12 @@ class TFConvBertForSequenceClassification(TFConvBertPreTrainedModel, TFSequenceC
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFSequenceClassifierOutput]: ) -> tuple | TFSequenceClassifierOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1208,12 +1206,12 @@ class TFConvBertForMultipleChoice(TFConvBertPreTrainedModel, TFMultipleChoiceLos
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFMultipleChoiceModelOutput]: ) -> tuple | TFMultipleChoiceModelOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1316,12 +1314,12 @@ class TFConvBertForTokenClassification(TFConvBertPreTrainedModel, TFTokenClassif
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFTokenClassifierOutput]: ) -> tuple | TFTokenClassifierOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1399,13 +1397,13 @@ class TFConvBertForQuestionAnswering(TFConvBertPreTrainedModel, TFQuestionAnswer
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: tf.Tensor | None = None, start_positions: tf.Tensor | None = None,
end_positions: tf.Tensor | None = None, end_positions: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFQuestionAnsweringModelOutput]: ) -> tuple | TFQuestionAnsweringModelOutput:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -16,8 +16,6 @@
from __future__ import annotations from __future__ import annotations
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -238,7 +236,7 @@ class TFConvNextStage(keras.layers.Layer):
kernel_size: int = 2, kernel_size: int = 2,
stride: int = 2, stride: int = 2,
depth: int = 2, depth: int = 2,
drop_path_rates: Optional[list[float]] = None, drop_path_rates: list[float] | None = None,
**kwargs, **kwargs,
): ):
super().__init__(**kwargs) super().__init__(**kwargs)
@ -365,10 +363,10 @@ class TFConvNextMainLayer(keras.layers.Layer):
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
) )
@ -503,10 +501,10 @@ class TFConvNextModel(TFConvNextPreTrainedModel):
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:
@ -589,11 +587,11 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ..., Labels for computing the image classification/regression loss. Indices should be in `[0, ...,

View File

@ -16,8 +16,6 @@
from __future__ import annotations from __future__ import annotations
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -279,7 +277,7 @@ class TFConvNextV2Stage(keras.layers.Layer):
kernel_size: int = 2, kernel_size: int = 2,
stride: int = 2, stride: int = 2,
depth: int = 2, depth: int = 2,
drop_path_rates: Optional[list[float]] = None, drop_path_rates: list[float] | None = None,
**kwargs, **kwargs,
): ):
super().__init__(**kwargs) super().__init__(**kwargs)
@ -367,9 +365,9 @@ class TFConvNextV2Encoder(keras.layers.Layer):
def call( def call(
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
output_hidden_states: Optional[bool] = False, output_hidden_states: bool | None = False,
return_dict: Optional[bool] = True, return_dict: bool | None = True,
) -> Union[tuple, TFBaseModelOutputWithNoAttention]: ) -> tuple | TFBaseModelOutputWithNoAttention:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
for i, layer_module in enumerate(self.stages): for i, layer_module in enumerate(self.stages):
@ -411,10 +409,10 @@ class TFConvNextV2MainLayer(keras.layers.Layer):
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
) )
@ -557,10 +555,10 @@ class TFConvNextV2Model(TFConvNextV2PreTrainedModel):
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndNoAttention, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPoolingAndNoAttention | tuple[tf.Tensor]:
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
) )
@ -627,11 +625,11 @@ class TFConvNextV2ForImageClassification(TFConvNextV2PreTrainedModel, TFSequence
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFImageClassifierOutputWithNoAttention, tuple[tf.Tensor]]: ) -> TFImageClassifierOutputWithNoAttention | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ..., Labels for computing the image classification/regression loss. Indices should be in `[0, ...,

View File

@ -17,8 +17,6 @@
from __future__ import annotations from __future__ import annotations
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -303,18 +301,18 @@ class TFCTRLMainLayer(keras.layers.Layer):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFBaseModelOutputWithPast]: ) -> tuple | TFBaseModelOutputWithPast:
# If using past key value states, only the last tokens # If using past key value states, only the last tokens
# should be given as an input # should be given as an input
if past_key_values is not None: if past_key_values is not None:
@ -594,18 +592,18 @@ class TFCTRLModel(TFCTRLPreTrainedModel):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFBaseModelOutputWithPast]: ) -> tuple | TFBaseModelOutputWithPast:
outputs = self.transformer( outputs = self.transformer(
input_ids=input_ids, input_ids=input_ids,
past_key_values=past_key_values, past_key_values=past_key_values,
@ -722,19 +720,19 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel, TFCausalLanguageModelingLoss):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFCausalLMOutputWithPast]: ) -> tuple | TFCausalLMOutputWithPast:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ..., Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,
@ -835,19 +833,19 @@ class TFCTRLForSequenceClassification(TFCTRLPreTrainedModel, TFSequenceClassific
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFSequenceClassifierOutput]: ) -> tuple | TFSequenceClassifierOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ..., Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import collections.abc import collections.abc
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import tensorflow as tf import tensorflow as tf
@ -65,8 +64,8 @@ class TFBaseModelOutputWithCLSToken(ModelOutput):
the initial embedding outputs. the initial embedding outputs.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
cls_token_value: Optional[tf.Tensor] = None cls_token_value: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
@ -766,10 +765,10 @@ class TFCvtEncoder(keras.layers.Layer):
def call( def call(
self, self,
pixel_values: TFModelInputType, pixel_values: TFModelInputType,
output_hidden_states: Optional[bool] = False, output_hidden_states: bool | None = False,
return_dict: Optional[bool] = True, return_dict: bool | None = True,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithCLSToken, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithCLSToken | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
hidden_state = pixel_values hidden_state = pixel_values
# When running on CPU, `keras.layers.Conv2D` doesn't support (batch_size, num_channels, height, width) # When running on CPU, `keras.layers.Conv2D` doesn't support (batch_size, num_channels, height, width)
@ -821,10 +820,10 @@ class TFCvtMainLayer(keras.layers.Layer):
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithCLSToken, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithCLSToken | tuple[tf.Tensor]:
if pixel_values is None: if pixel_values is None:
raise ValueError("You have to specify pixel_values") raise ValueError("You have to specify pixel_values")
@ -929,10 +928,10 @@ class TFCvtModel(TFCvtPreTrainedModel):
def call( def call(
self, self,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithCLSToken, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithCLSToken | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:
@ -1015,10 +1014,10 @@ class TFCvtForImageClassification(TFCvtPreTrainedModel, TFSequenceClassification
self, self,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFImageClassifierOutputWithNoAttention, tuple[tf.Tensor]]: ) -> TFImageClassifierOutputWithNoAttention | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ..., Labels for computing the image classification/regression loss. Indices should be in `[0, ...,

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import collections.abc import collections.abc
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -90,8 +89,8 @@ class TFData2VecVisionModelOutputWithPooling(TFBaseModelOutputWithPooling):
heads. heads.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
pooler_output: Optional[tf.Tensor] = None pooler_output: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -258,7 +257,7 @@ class TFData2VecVisionPatchEmbeddings(keras.layers.Layer):
class TFData2VecVisionSelfAttention(keras.layers.Layer): class TFData2VecVisionSelfAttention(keras.layers.Layer):
def __init__(self, config: Data2VecVisionConfig, window_size: Optional[tuple] = None, **kwargs): def __init__(self, config: Data2VecVisionConfig, window_size: tuple | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
if config.hidden_size % config.num_attention_heads != 0: if config.hidden_size % config.num_attention_heads != 0:
@ -306,7 +305,7 @@ class TFData2VecVisionSelfAttention(keras.layers.Layer):
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
head_mask: tf.Tensor, head_mask: tf.Tensor,
output_attentions: bool, output_attentions: bool,
relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None, relative_position_bias: TFData2VecVisionRelativePositionBias | None = None,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
batch_size = shape_list(hidden_states)[0] batch_size = shape_list(hidden_states)[0]
@ -402,7 +401,7 @@ class TFData2VecVisionSelfOutput(keras.layers.Layer):
class TFData2VecVisionAttention(keras.layers.Layer): class TFData2VecVisionAttention(keras.layers.Layer):
def __init__(self, config: Data2VecVisionConfig, window_size: Optional[tuple] = None, **kwargs): def __init__(self, config: Data2VecVisionConfig, window_size: tuple | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.attention = TFData2VecVisionSelfAttention(config, window_size=window_size, name="attention") self.attention = TFData2VecVisionSelfAttention(config, window_size=window_size, name="attention")
@ -416,7 +415,7 @@ class TFData2VecVisionAttention(keras.layers.Layer):
input_tensor: tf.Tensor, input_tensor: tf.Tensor,
head_mask: tf.Tensor, head_mask: tf.Tensor,
output_attentions: bool, output_attentions: bool,
relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None, relative_position_bias: TFData2VecVisionRelativePositionBias | None = None,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
self_outputs = self.attention( self_outputs = self.attention(
@ -504,7 +503,7 @@ class TFData2VecVisionLayer(keras.layers.Layer):
"""This corresponds to the Block class in the timm implementation.""" """This corresponds to the Block class in the timm implementation."""
def __init__( def __init__(
self, config: Data2VecVisionConfig, window_size: Optional[tuple] = None, drop_path_rate: float = 0.0, **kwargs self, config: Data2VecVisionConfig, window_size: tuple | None = None, drop_path_rate: float = 0.0, **kwargs
): ):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
@ -570,7 +569,7 @@ class TFData2VecVisionLayer(keras.layers.Layer):
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
head_mask: tf.Tensor, head_mask: tf.Tensor,
output_attentions: bool, output_attentions: bool,
relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None, relative_position_bias: TFData2VecVisionRelativePositionBias | None = None,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
self_attention_outputs = self.attention( self_attention_outputs = self.attention(
@ -667,7 +666,7 @@ class TFData2VecVisionRelativePositionBias(keras.layers.Layer):
class TFData2VecVisionEncoder(keras.layers.Layer): class TFData2VecVisionEncoder(keras.layers.Layer):
def __init__(self, config: Data2VecVisionConfig, window_size: Optional[tuple] = None, **kwargs): def __init__(self, config: Data2VecVisionConfig, window_size: tuple | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
if config.use_shared_relative_position_bias: if config.use_shared_relative_position_bias:
@ -696,7 +695,7 @@ class TFData2VecVisionEncoder(keras.layers.Layer):
output_attentions: bool = False, output_attentions: bool = False,
output_hidden_states: bool = False, output_hidden_states: bool = False,
return_dict: bool = True, return_dict: bool = True,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None all_self_attentions = () if output_attentions else None
@ -783,11 +782,11 @@ class TFData2VecVisionMainLayer(keras.layers.Layer):
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None, bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple, TFData2VecVisionModelOutputWithPooling]: ) -> tuple | TFData2VecVisionModelOutputWithPooling:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -995,11 +994,11 @@ class TFData2VecVisionModel(TFData2VecVisionPreTrainedModel):
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
bool_masked_pos: tf.Tensor | None = None, bool_masked_pos: tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple, TFData2VecVisionModelOutputWithPooling]: ) -> tuple | TFData2VecVisionModelOutputWithPooling:
r""" r"""
bool_masked_pos (`tf.Tensor` of shape `(batch_size, num_patches)`, *optional*): bool_masked_pos (`tf.Tensor` of shape `(batch_size, num_patches)`, *optional*):
Boolean masked positions. Indicates which patches are masked (1) and which aren't (0). Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
@ -1059,12 +1058,12 @@ class TFData2VecVisionForImageClassification(TFData2VecVisionPreTrainedModel, TF
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple]: ) -> TFSequenceClassifierOutput | tuple:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ..., Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
@ -1121,10 +1120,10 @@ class TFData2VecVisionConvModule(keras.layers.Layer):
self, self,
in_channels: int, in_channels: int,
out_channels: int, out_channels: int,
kernel_size: Union[int, tuple[int, int]], kernel_size: int | tuple[int, int],
padding: str = "valid", padding: str = "valid",
bias: bool = False, bias: bool = False,
dilation: Union[int, tuple[int, int]] = 1, dilation: int | tuple[int, int] = 1,
**kwargs, **kwargs,
) -> None: ) -> None:
super().__init__(**kwargs) super().__init__(**kwargs)
@ -1462,7 +1461,7 @@ class TFData2VecVisionFCNHead(keras.layers.Layer):
config: Data2VecVisionConfig, config: Data2VecVisionConfig,
in_index: int = 2, in_index: int = 2,
kernel_size: int = 3, kernel_size: int = 3,
dilation: Union[int, tuple[int, int]] = 1, dilation: int | tuple[int, int] = 1,
**kwargs, **kwargs,
) -> None: ) -> None:
super().__init__(**kwargs) super().__init__(**kwargs)
@ -1599,10 +1598,10 @@ class TFData2VecVisionForSemanticSegmentation(TFData2VecVisionPreTrainedModel):
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
) -> Union[tuple, TFSemanticSegmenterOutput]: ) -> tuple | TFSemanticSegmenterOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, height, width)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, height, width)`, *optional*):
Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ..., Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import math import math
from collections.abc import Sequence from collections.abc import Sequence
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -208,9 +207,9 @@ class TFDebertaAttention(keras.layers.Layer):
self, self,
input_tensor: tf.Tensor, input_tensor: tf.Tensor,
attention_mask: tf.Tensor, attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None, query_states: tf.Tensor | None = None,
relative_pos: Optional[tf.Tensor] = None, relative_pos: tf.Tensor | None = None,
rel_embeddings: Optional[tf.Tensor] = None, rel_embeddings: tf.Tensor | None = None,
output_attentions: bool = False, output_attentions: bool = False,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
@ -319,9 +318,9 @@ class TFDebertaLayer(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor, attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None, query_states: tf.Tensor | None = None,
relative_pos: Optional[tf.Tensor] = None, relative_pos: tf.Tensor | None = None,
rel_embeddings: Optional[tf.Tensor] = None, rel_embeddings: tf.Tensor | None = None,
output_attentions: bool = False, output_attentions: bool = False,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
@ -409,13 +408,13 @@ class TFDebertaEncoder(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor, attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None, query_states: tf.Tensor | None = None,
relative_pos: Optional[tf.Tensor] = None, relative_pos: tf.Tensor | None = None,
output_attentions: bool = False, output_attentions: bool = False,
output_hidden_states: bool = False, output_hidden_states: bool = False,
return_dict: bool = True, return_dict: bool = True,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
@ -651,9 +650,9 @@ class TFDebertaDisentangledSelfAttention(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor, attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None, query_states: tf.Tensor | None = None,
relative_pos: Optional[tf.Tensor] = None, relative_pos: tf.Tensor | None = None,
rel_embeddings: Optional[tf.Tensor] = None, rel_embeddings: tf.Tensor | None = None,
output_attentions: bool = False, output_attentions: bool = False,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
@ -881,11 +880,11 @@ class TFDebertaEmbeddings(keras.layers.Layer):
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
token_type_ids: Optional[tf.Tensor] = None, token_type_ids: tf.Tensor | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
mask: Optional[tf.Tensor] = None, mask: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
""" """
@ -1074,11 +1073,11 @@ class TFDebertaMainLayer(keras.layers.Layer):
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
@ -1255,11 +1254,11 @@ class TFDebertaModel(TFDebertaPreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
outputs = self.deberta( outputs = self.deberta(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -1314,12 +1313,12 @@ class TFDebertaForMaskedLM(TFDebertaPreTrainedModel, TFMaskedLanguageModelingLos
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1404,12 +1403,12 @@ class TFDebertaForSequenceClassification(TFDebertaPreTrainedModel, TFSequenceCla
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1497,12 +1496,12 @@ class TFDebertaForTokenClassification(TFDebertaPreTrainedModel, TFTokenClassific
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1579,13 +1578,13 @@ class TFDebertaForQuestionAnswering(TFDebertaPreTrainedModel, TFQuestionAnswerin
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -16,8 +16,6 @@
from __future__ import annotations from __future__ import annotations
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -192,9 +190,9 @@ class TFDebertaV2Attention(keras.layers.Layer):
self, self,
input_tensor: tf.Tensor, input_tensor: tf.Tensor,
attention_mask: tf.Tensor, attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None, query_states: tf.Tensor | None = None,
relative_pos: Optional[tf.Tensor] = None, relative_pos: tf.Tensor | None = None,
rel_embeddings: Optional[tf.Tensor] = None, rel_embeddings: tf.Tensor | None = None,
output_attentions: bool = False, output_attentions: bool = False,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
@ -306,9 +304,9 @@ class TFDebertaV2Layer(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor, attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None, query_states: tf.Tensor | None = None,
relative_pos: Optional[tf.Tensor] = None, relative_pos: tf.Tensor | None = None,
rel_embeddings: Optional[tf.Tensor] = None, rel_embeddings: tf.Tensor | None = None,
output_attentions: bool = False, output_attentions: bool = False,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
@ -485,13 +483,13 @@ class TFDebertaV2Encoder(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor, attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None, query_states: tf.Tensor | None = None,
relative_pos: Optional[tf.Tensor] = None, relative_pos: tf.Tensor | None = None,
output_attentions: bool = False, output_attentions: bool = False,
output_hidden_states: bool = False, output_hidden_states: bool = False,
return_dict: bool = True, return_dict: bool = True,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
if len(shape_list(attention_mask)) <= 2: if len(shape_list(attention_mask)) <= 2:
input_mask = attention_mask input_mask = attention_mask
else: else:
@ -718,9 +716,9 @@ class TFDebertaV2DisentangledSelfAttention(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor, attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None, query_states: tf.Tensor | None = None,
relative_pos: Optional[tf.Tensor] = None, relative_pos: tf.Tensor | None = None,
rel_embeddings: Optional[tf.Tensor] = None, rel_embeddings: tf.Tensor | None = None,
output_attentions: bool = False, output_attentions: bool = False,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
@ -985,11 +983,11 @@ class TFDebertaV2Embeddings(keras.layers.Layer):
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
token_type_ids: Optional[tf.Tensor] = None, token_type_ids: tf.Tensor | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
mask: Optional[tf.Tensor] = None, mask: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
""" """
@ -1181,11 +1179,11 @@ class TFDebertaV2MainLayer(keras.layers.Layer):
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
@ -1364,11 +1362,11 @@ class TFDebertaV2Model(TFDebertaV2PreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
outputs = self.deberta( outputs = self.deberta(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -1424,12 +1422,12 @@ class TFDebertaV2ForMaskedLM(TFDebertaV2PreTrainedModel, TFMaskedLanguageModelin
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1515,12 +1513,12 @@ class TFDebertaV2ForSequenceClassification(TFDebertaV2PreTrainedModel, TFSequenc
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1609,12 +1607,12 @@ class TFDebertaV2ForTokenClassification(TFDebertaV2PreTrainedModel, TFTokenClass
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1692,13 +1690,13 @@ class TFDebertaV2ForQuestionAnswering(TFDebertaV2PreTrainedModel, TFQuestionAnsw
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.
@ -1793,12 +1791,12 @@ class TFDebertaV2ForMultipleChoice(TFDebertaV2PreTrainedModel, TFMultipleChoiceL
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import collections.abc import collections.abc
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import tensorflow as tf import tensorflow as tf
@ -88,9 +87,9 @@ class TFDeiTForImageClassificationWithTeacherOutput(ModelOutput):
the self-attention heads. the self-attention heads.
""" """
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
cls_logits: Optional[tf.Tensor] = None cls_logits: tf.Tensor | None = None
distillation_logits: Optional[tf.Tensor] = None distillation_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -550,7 +549,7 @@ class TFDeiTEncoder(keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
@ -630,12 +629,12 @@ class TFDeiTMainLayer(keras.layers.Layer):
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None, bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
interpolate_pos_encoding: bool = False, interpolate_pos_encoding: bool = False,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor, ...]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor, ...]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -780,12 +779,12 @@ class TFDeiTModel(TFDeiTPreTrainedModel):
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None, bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
interpolate_pos_encoding: bool = False, interpolate_pos_encoding: bool = False,
training: bool = False, training: bool = False,
) -> Union[tuple, TFBaseModelOutputWithPooling]: ) -> tuple | TFBaseModelOutputWithPooling:
outputs = self.deit( outputs = self.deit(
pixel_values=pixel_values, pixel_values=pixel_values,
bool_masked_pos=bool_masked_pos, bool_masked_pos=bool_masked_pos,
@ -910,12 +909,12 @@ class TFDeiTForMaskedImageModeling(TFDeiTPreTrainedModel):
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None, bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
interpolate_pos_encoding: bool = False, interpolate_pos_encoding: bool = False,
training: bool = False, training: bool = False,
) -> Union[tuple, TFMaskedImageModelingOutput]: ) -> tuple | TFMaskedImageModelingOutput:
r""" r"""
bool_masked_pos (`tf.Tensor` of type bool and shape `(batch_size, num_patches)`): bool_masked_pos (`tf.Tensor` of type bool and shape `(batch_size, num_patches)`):
Boolean masked positions. Indicates which patches are masked (1) and which aren't (0). Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
@ -1046,12 +1045,12 @@ class TFDeiTForImageClassification(TFDeiTPreTrainedModel, TFSequenceClassificati
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
interpolate_pos_encoding: bool = False, interpolate_pos_encoding: bool = False,
training: bool = False, training: bool = False,
) -> Union[tf.Tensor, TFImageClassifierOutput]: ) -> tf.Tensor | TFImageClassifierOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ..., Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
@ -1171,12 +1170,12 @@ class TFDeiTForImageClassificationWithTeacher(TFDeiTPreTrainedModel):
self, self,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
interpolate_pos_encoding: bool = False, interpolate_pos_encoding: bool = False,
training: bool = False, training: bool = False,
) -> Union[tuple, TFDeiTForImageClassificationWithTeacherOutput]: ) -> tuple | TFDeiTForImageClassificationWithTeacherOutput:
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
outputs = self.deit( outputs = self.deit(

View File

@ -20,7 +20,6 @@ TF 2.0 Transformer XL model.
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -543,9 +542,9 @@ class TFTransfoXLMainLayer(keras.layers.Layer):
mems: list[tf.Tensor] | None = None, mems: list[tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
): ):
@ -690,7 +689,7 @@ class TFTransfoXLModelOutput(ModelOutput):
heads. heads.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
mems: list[tf.Tensor] = None mems: list[tf.Tensor] = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -723,7 +722,7 @@ class TFTransfoXLLMHeadModelOutput(ModelOutput):
heads. heads.
""" """
prediction_scores: Optional[tf.Tensor] = None prediction_scores: tf.Tensor | None = None
mems: list[tf.Tensor] = None mems: list[tf.Tensor] = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -757,7 +756,7 @@ class TFTransfoXLSequenceClassifierOutputWithPast(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
mems: list[tf.Tensor] = None mems: list[tf.Tensor] = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -1047,12 +1046,12 @@ class TFTransfoXLForSequenceClassification(TFTransfoXLPreTrainedModel, TFSequenc
mems: list[tf.Tensor] | None = None, mems: list[tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFTransfoXLSequenceClassifierOutputWithPast]: ) -> tuple | TFTransfoXLSequenceClassifierOutputWithPast:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ..., Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,

View File

@ -19,7 +19,6 @@ TF 2.0 DistilBERT model
from __future__ import annotations from __future__ import annotations
import warnings import warnings
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -594,11 +593,11 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel):
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
outputs = self.distilbert( outputs = self.distilbert(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -697,12 +696,12 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel, TFMaskedLanguageModel
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -794,12 +793,12 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel, TFSeque
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -882,12 +881,12 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel, TFTokenCla
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -969,12 +968,12 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1071,13 +1070,13 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -18,7 +18,6 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import tensorflow as tf import tensorflow as tf
@ -68,7 +67,7 @@ class TFDPRContextEncoderOutput(ModelOutput):
heads. heads.
""" """
pooler_output: Optional[tf.Tensor] = None pooler_output: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -96,7 +95,7 @@ class TFDPRQuestionEncoderOutput(ModelOutput):
heads. heads.
""" """
pooler_output: Optional[tf.Tensor] = None pooler_output: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -127,9 +126,9 @@ class TFDPRReaderOutput(ModelOutput):
heads. heads.
""" """
start_logits: Optional[tf.Tensor] = None start_logits: tf.Tensor | None = None
end_logits: Optional[tf.Tensor] = None end_logits: tf.Tensor | None = None
relevance_logits: Optional[tf.Tensor] = None relevance_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -155,15 +154,15 @@ class TFDPREncoderLayer(keras.layers.Layer):
@unpack_inputs @unpack_inputs
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None, token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor, ...]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor, ...]:
outputs = self.bert_model( outputs = self.bert_model(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -226,14 +225,14 @@ class TFDPRSpanPredictorLayer(keras.layers.Layer):
@unpack_inputs @unpack_inputs
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: bool = False, output_attentions: bool = False,
output_hidden_states: bool = False, output_hidden_states: bool = False,
return_dict: bool = False, return_dict: bool = False,
training: bool = False, training: bool = False,
) -> Union[TFDPRReaderOutput, tuple[tf.Tensor, ...]]: ) -> TFDPRReaderOutput | tuple[tf.Tensor, ...]:
# notations: N - number of questions in a batch, M - number of passages per questions, L - sequence length # notations: N - number of questions in a batch, M - number of passages per questions, L - sequence length
n_passages, sequence_length = shape_list(input_ids) if input_ids is not None else shape_list(inputs_embeds)[:2] n_passages, sequence_length = shape_list(input_ids) if input_ids is not None else shape_list(inputs_embeds)[:2]
# feed encoder # feed encoder
@ -296,7 +295,7 @@ class TFDPRSpanPredictor(TFPreTrainedModel):
@unpack_inputs @unpack_inputs
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None, token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
@ -304,7 +303,7 @@ class TFDPRSpanPredictor(TFPreTrainedModel):
output_hidden_states: bool = False, output_hidden_states: bool = False,
return_dict: bool = False, return_dict: bool = False,
training: bool = False, training: bool = False,
) -> Union[TFDPRReaderOutput, tuple[tf.Tensor, ...]]: ) -> TFDPRReaderOutput | tuple[tf.Tensor, ...]:
outputs = self.encoder( outputs = self.encoder(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -329,7 +328,7 @@ class TFDPREncoder(TFPreTrainedModel):
@unpack_inputs @unpack_inputs
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None, token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
@ -337,7 +336,7 @@ class TFDPREncoder(TFPreTrainedModel):
output_hidden_states: bool = False, output_hidden_states: bool = False,
return_dict: bool = False, return_dict: bool = False,
training: bool = False, training: bool = False,
) -> Union[TFDPRReaderOutput, tuple[tf.Tensor, ...]]: ) -> TFDPRReaderOutput | tuple[tf.Tensor, ...]:
outputs = self.encoder( outputs = self.encoder(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math import math
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -461,12 +460,12 @@ class TFElectraEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None, encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None, encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None, past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool], use_cache: bool | None,
output_attentions: bool, output_attentions: bool,
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -601,10 +600,10 @@ class TFElectraEmbeddings(keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call # Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
token_type_ids: Optional[tf.Tensor] = None, token_type_ids: tf.Tensor | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
past_key_values_length=0, past_key_values_length=0,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
@ -806,13 +805,13 @@ class TFElectraMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder: if not self.config.is_decoder:
use_cache = False use_cache = False
@ -931,7 +930,7 @@ class TFElectraForPreTrainingOutput(ModelOutput):
heads. heads.
""" """
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -1057,13 +1056,13 @@ class TFElectraModel(TFElectraPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1139,11 +1138,11 @@ class TFElectraForPreTraining(TFElectraPreTrainedModel):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFElectraForPreTrainingOutput, tuple[tf.Tensor]]: ) -> TFElectraForPreTrainingOutput | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:
@ -1281,12 +1280,12 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel, TFMaskedLanguageModelingLos
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1410,12 +1409,12 @@ class TFElectraForSequenceClassification(TFElectraPreTrainedModel, TFSequenceCla
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1496,12 +1495,12 @@ class TFElectraForMultipleChoice(TFElectraPreTrainedModel, TFMultipleChoiceLoss)
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1607,12 +1606,12 @@ class TFElectraForTokenClassification(TFElectraPreTrainedModel, TFTokenClassific
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1695,13 +1694,13 @@ class TFElectraForQuestionAnswering(TFElectraPreTrainedModel, TFQuestionAnswerin
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import inspect import inspect
import re import re
import warnings import warnings
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -204,9 +203,9 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss):
def __init__( def __init__(
self, self,
config: Optional[PretrainedConfig] = None, config: PretrainedConfig | None = None,
encoder: Optional[TFPreTrainedModel] = None, encoder: TFPreTrainedModel | None = None,
decoder: Optional[TFPreTrainedModel] = None, decoder: TFPreTrainedModel | None = None,
): ):
if config is None and (encoder is None or decoder is None): if config is None and (encoder is None or decoder is None):
raise ValueError("Either a configuration or an encoder and a decoder has to be provided.") raise ValueError("Either a configuration or an encoder and a decoder has to be provided.")
@ -311,8 +310,8 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss):
@classmethod @classmethod
def from_encoder_decoder_pretrained( def from_encoder_decoder_pretrained(
cls, cls,
encoder_pretrained_model_name_or_path: Optional[str] = None, encoder_pretrained_model_name_or_path: str | None = None,
decoder_pretrained_model_name_or_path: Optional[str] = None, decoder_pretrained_model_name_or_path: str | None = None,
*model_args, *model_args,
**kwargs, **kwargs,
) -> TFPreTrainedModel: ) -> TFPreTrainedModel:
@ -465,13 +464,13 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
**kwargs, **kwargs,
) -> Union[TFSeq2SeqLMOutput, tuple[tf.Tensor]]: ) -> TFSeq2SeqLMOutput | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:

View File

@ -17,7 +17,6 @@
from __future__ import annotations from __future__ import annotations
import os import os
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -331,7 +330,7 @@ class TFEsmSelfAttention(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None = None, encoder_hidden_states: tf.Tensor | None = None,
encoder_attention_mask: tf.Tensor | None = None, encoder_attention_mask: tf.Tensor | None = None,
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
mixed_query_layer = self.query(hidden_states) mixed_query_layer = self.query(hidden_states)
@ -934,13 +933,13 @@ class TFEsmMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder: if not self.config.is_decoder:
use_cache = False use_cache = False
@ -1117,13 +1116,13 @@ class TFEsmModel(TFEsmPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1222,11 +1221,11 @@ class TFEsmForMaskedLM(TFEsmPreTrainedModel, TFMaskedLanguageModelingLoss):
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1370,11 +1369,11 @@ class TFEsmForSequenceClassification(TFEsmPreTrainedModel, TFSequenceClassificat
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1457,11 +1456,11 @@ class TFEsmForTokenClassification(TFEsmPreTrainedModel, TFTokenClassificationLos
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

View File

@ -17,7 +17,7 @@ from __future__ import annotations
from collections.abc import Sequence from collections.abc import Sequence
from functools import cache from functools import cache
from typing import Any, Callable, Optional from typing import Any, Callable
import numpy as np import numpy as np
import torch import torch
@ -78,8 +78,8 @@ def rot_vec_mul(r: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
@cache @cache
def identity_rot_mats( def identity_rot_mats(
batch_dims: tuple[int, ...], batch_dims: tuple[int, ...],
dtype: Optional[torch.dtype] = None, dtype: torch.dtype | None = None,
device: Optional[torch.device] = None, device: torch.device | None = None,
requires_grad: bool = True, requires_grad: bool = True,
) -> torch.Tensor: ) -> torch.Tensor:
rots = torch.eye(3, dtype=dtype, device=device, requires_grad=requires_grad) rots = torch.eye(3, dtype=dtype, device=device, requires_grad=requires_grad)
@ -93,8 +93,8 @@ def identity_rot_mats(
@cache @cache
def identity_trans( def identity_trans(
batch_dims: tuple[int, ...], batch_dims: tuple[int, ...],
dtype: Optional[torch.dtype] = None, dtype: torch.dtype | None = None,
device: Optional[torch.device] = None, device: torch.device | None = None,
requires_grad: bool = True, requires_grad: bool = True,
) -> torch.Tensor: ) -> torch.Tensor:
trans = torch.zeros((*batch_dims, 3), dtype=dtype, device=device, requires_grad=requires_grad) trans = torch.zeros((*batch_dims, 3), dtype=dtype, device=device, requires_grad=requires_grad)
@ -104,8 +104,8 @@ def identity_trans(
@cache @cache
def identity_quats( def identity_quats(
batch_dims: tuple[int, ...], batch_dims: tuple[int, ...],
dtype: Optional[torch.dtype] = None, dtype: torch.dtype | None = None,
device: Optional[torch.device] = None, device: torch.device | None = None,
requires_grad: bool = True, requires_grad: bool = True,
) -> torch.Tensor: ) -> torch.Tensor:
quat = torch.zeros((*batch_dims, 4), dtype=dtype, device=device, requires_grad=requires_grad) quat = torch.zeros((*batch_dims, 4), dtype=dtype, device=device, requires_grad=requires_grad)
@ -260,8 +260,8 @@ class Rotation:
def __init__( def __init__(
self, self,
rot_mats: Optional[torch.Tensor] = None, rot_mats: torch.Tensor | None = None,
quats: Optional[torch.Tensor] = None, quats: torch.Tensor | None = None,
normalize_quats: bool = True, normalize_quats: bool = True,
): ):
""" """
@ -295,8 +295,8 @@ class Rotation:
@staticmethod @staticmethod
def identity( def identity(
shape, shape,
dtype: Optional[torch.dtype] = None, dtype: torch.dtype | None = None,
device: Optional[torch.device] = None, device: torch.device | None = None,
requires_grad: bool = True, requires_grad: bool = True,
fmt: str = "quat", fmt: str = "quat",
) -> Rotation: ) -> Rotation:
@ -682,7 +682,7 @@ class Rotation:
else: else:
raise ValueError("Both rotations are None") raise ValueError("Both rotations are None")
def to(self, device: Optional[torch.device], dtype: Optional[torch.dtype]) -> Rotation: def to(self, device: torch.device | None, dtype: torch.dtype | None) -> Rotation:
""" """
Analogous to the to() method of torch Tensors Analogous to the to() method of torch Tensors
@ -734,7 +734,7 @@ class Rigid:
dimensions of its component parts. dimensions of its component parts.
""" """
def __init__(self, rots: Optional[Rotation], trans: Optional[torch.Tensor]): def __init__(self, rots: Rotation | None, trans: torch.Tensor | None):
""" """
Args: Args:
rots: A [*, 3, 3] rotation tensor rots: A [*, 3, 3] rotation tensor
@ -786,8 +786,8 @@ class Rigid:
@staticmethod @staticmethod
def identity( def identity(
shape: tuple[int, ...], shape: tuple[int, ...],
dtype: Optional[torch.dtype] = None, dtype: torch.dtype | None = None,
device: Optional[torch.device] = None, device: torch.device | None = None,
requires_grad: bool = True, requires_grad: bool = True,
fmt: str = "quat", fmt: str = "quat",
) -> Rigid: ) -> Rigid:

View File

@ -22,7 +22,6 @@ import itertools
import random import random
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -261,14 +260,14 @@ class TFFlaubertModel(TFFlaubertPreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None, lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None, cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
outputs = self.transformer( outputs = self.transformer(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -544,14 +543,14 @@ class TFFlaubertMainLayer(keras.layers.Layer):
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None, lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None, cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
# removed: src_enc=None, src_len=None # removed: src_enc=None, src_len=None
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
@ -808,7 +807,7 @@ class TFFlaubertWithLMHeadModelOutput(ModelOutput):
heads. heads.
""" """
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -864,14 +863,14 @@ class TFFlaubertWithLMHeadModel(TFFlaubertPreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None, lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None, cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFFlaubertWithLMHeadModelOutput]: ) -> tuple | TFFlaubertWithLMHeadModelOutput:
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -940,15 +939,15 @@ class TFFlaubertForSequenceClassification(TFFlaubertPreTrainedModel, TFSequenceC
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None, lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None, cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1031,16 +1030,16 @@ class TFFlaubertForQuestionAnsweringSimple(TFFlaubertPreTrainedModel, TFQuestion
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None, lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None, cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.
@ -1138,15 +1137,15 @@ class TFFlaubertForTokenClassification(TFFlaubertPreTrainedModel, TFTokenClassif
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None, lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None, cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1251,15 +1250,15 @@ class TFFlaubertForMultipleChoice(TFFlaubertPreTrainedModel, TFMultipleChoiceLos
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None, lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None, cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
if input_ids is not None: if input_ids is not None:
num_choices = shape_list(input_ids)[1] num_choices = shape_list(input_ids)[1]
seq_length = shape_list(input_ids)[2] seq_length = shape_list(input_ids)[2]

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -1104,7 +1103,7 @@ class TFFunnelForPreTrainingOutput(ModelOutput):
heads. heads.
""" """
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -1224,11 +1223,11 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel):
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], TFBaseModelOutput]: ) -> tuple[tf.Tensor] | TFBaseModelOutput:
return self.funnel( return self.funnel(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -1280,11 +1279,11 @@ class TFFunnelModel(TFFunnelPreTrainedModel):
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], TFBaseModelOutput]: ) -> tuple[tf.Tensor] | TFBaseModelOutput:
return self.funnel( return self.funnel(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -1336,12 +1335,12 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
**kwargs, **kwargs,
) -> Union[tuple[tf.Tensor], TFFunnelForPreTrainingOutput]: ) -> tuple[tf.Tensor] | TFFunnelForPreTrainingOutput:
r""" r"""
Returns: Returns:
@ -1426,12 +1425,12 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss)
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], TFMaskedLMOutput]: ) -> tuple[tf.Tensor] | TFMaskedLMOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1509,12 +1508,12 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], TFSequenceClassifierOutput]: ) -> tuple[tf.Tensor] | TFSequenceClassifierOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1598,12 +1597,12 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], TFMultipleChoiceModelOutput]: ) -> tuple[tf.Tensor] | TFMultipleChoiceModelOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1705,12 +1704,12 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], TFTokenClassifierOutput]: ) -> tuple[tf.Tensor] | TFTokenClassifierOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1793,13 +1792,13 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], TFQuestionAnsweringModelOutput]: ) -> tuple[tf.Tensor] | TFQuestionAnsweringModelOutput:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -18,7 +18,6 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -396,7 +395,7 @@ class TFGPT2MainLayer(keras.layers.Layer):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
@ -404,12 +403,12 @@ class TFGPT2MainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
@ -628,8 +627,8 @@ class TFGPT2DoubleHeadsModelOutput(ModelOutput):
heads. heads.
""" """
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
mc_logits: Optional[tf.Tensor] = None mc_logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -764,7 +763,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
@ -772,12 +771,12 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -880,7 +879,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
@ -888,13 +887,13 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]: ) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -991,19 +990,19 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
mc_token_ids: np.ndarray | tf.Tensor | None = None, mc_token_ids: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFGPT2DoubleHeadsModelOutput, tuple[tf.Tensor]]: ) -> TFGPT2DoubleHeadsModelOutput | tuple[tf.Tensor]:
r""" r"""
mc_token_ids (`tf.Tensor` or `Numpy array` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input): mc_token_ids (`tf.Tensor` or `Numpy array` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input):
Index of the classification token in each input sequence. Selected in the range `[0, input_ids.size(-1) - Index of the classification token in each input sequence. Selected in the range `[0, input_ids.size(-1) -
@ -1145,19 +1144,19 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutputWithPast, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutputWithPast | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ..., Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,

View File

@ -16,8 +16,6 @@
from __future__ import annotations from __future__ import annotations
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -203,7 +201,7 @@ class TFGPTJAttention(keras.layers.Layer):
def call( def call(
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
layer_past: Optional[tuple[tf.Tensor, tf.Tensor]] = None, layer_past: tuple[tf.Tensor, tf.Tensor] | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
@ -428,7 +426,7 @@ class TFGPTJMainLayer(keras.layers.Layer):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
) -> Union[TFBaseModelOutputWithPast, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPast | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
@ -694,18 +692,18 @@ class TFGPTJModel(TFGPTJPreTrainedModel):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPast, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPast | tuple[tf.Tensor]:
r""" r"""
use_cache (`bool`, *optional*, defaults to `True`): use_cache (`bool`, *optional*, defaults to `True`):
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
@ -794,19 +792,19 @@ class TFGPTJForCausalLM(TFGPTJPreTrainedModel, TFCausalLanguageModelingLoss):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFCausalLMOutputWithPast, tuple[tf.Tensor]]: ) -> TFCausalLMOutputWithPast | tuple[tf.Tensor]:
r""" r"""
labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size, input_ids_length)`, *optional*): labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size, input_ids_length)`, *optional*):
Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set
@ -902,19 +900,19 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutputWithPast, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutputWithPast | tuple[tf.Tensor]:
r""" r"""
labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1019,7 +1017,7 @@ class TFGPTJForQuestionAnswering(TFGPTJPreTrainedModel, TFQuestionAnsweringLoss)
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
@ -1027,11 +1025,11 @@ class TFGPTJForQuestionAnswering(TFGPTJPreTrainedModel, TFQuestionAnsweringLoss)
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`np.ndarray` or `tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`np.ndarray` or `tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -19,7 +19,7 @@ from __future__ import annotations
import collections.abc import collections.abc
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Optional, Union from typing import Any
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -79,7 +79,7 @@ LARGE_NEGATIVE = -1e8
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -253,11 +253,11 @@ class TFGroupViTModelOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits_per_image: Optional[tf.Tensor] = None logits_per_image: tf.Tensor | None = None
logits_per_text: Optional[tf.Tensor] = None logits_per_text: tf.Tensor | None = None
segmentation_logits: Optional[tf.Tensor] = None segmentation_logits: tf.Tensor | None = None
text_embeds: Optional[tf.Tensor] = None text_embeds: tf.Tensor | None = None
image_embeds: Optional[tf.Tensor] = None image_embeds: tf.Tensor | None = None
text_model_output: TFBaseModelOutputWithPooling = None text_model_output: TFBaseModelOutputWithPooling = None
vision_model_output: TFBaseModelOutputWithPooling = None vision_model_output: TFBaseModelOutputWithPooling = None
@ -646,9 +646,9 @@ class TFGroupViTTextEmbeddings(keras.layers.Layer):
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
) -> tf.Tensor: ) -> tf.Tensor:
""" """
Applies embedding based on inputs tensor. Applies embedding based on inputs tensor.
@ -809,9 +809,9 @@ class TFGroupViTMLP(keras.layers.Layer):
def __init__( def __init__(
self, self,
config: GroupViTVisionConfig, config: GroupViTVisionConfig,
hidden_size: Optional[int] = None, hidden_size: int | None = None,
intermediate_size: Optional[int] = None, intermediate_size: int | None = None,
output_size: Optional[int] = None, output_size: int | None = None,
**kwargs, **kwargs,
): ):
super().__init__(**kwargs) super().__init__(**kwargs)
@ -898,10 +898,10 @@ class TFGroupViTAttention(keras.layers.Layer):
def call( def call(
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: Optional[tf.Tensor] = None, attention_mask: tf.Tensor | None = None,
causal_attention_mask: Optional[tf.Tensor] = None, causal_attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
encoder_hidden_states: Optional[tf.Tensor] = None, encoder_hidden_states: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -1060,7 +1060,7 @@ class TFGroupViTTextEncoder(keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
encoder_states = () if output_hidden_states else None encoder_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
@ -1121,7 +1121,7 @@ class TFGroupViTVisionEncoder(keras.layers.Layer):
output_attentions: bool, output_attentions: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_groupings = () if output_attentions else None all_groupings = () if output_attentions else None
@ -1180,7 +1180,7 @@ class TFGroupViTTextTransformer(keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
input_shape = shape_list(input_ids) input_shape = shape_list(input_ids)
embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids) embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids)
@ -1292,7 +1292,7 @@ class TFGroupViTVisionTransformer(keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[tuple, TFBaseModelOutputWithPooling]: ) -> tuple | TFBaseModelOutputWithPooling:
embedding_output = self.embeddings(pixel_values) embedding_output = self.embeddings(pixel_values)
encoder_outputs = self.encoder( encoder_outputs = self.encoder(
@ -1356,11 +1356,11 @@ class TFGroupViTTextMainLayer(keras.layers.Layer):
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if input_ids is None: if input_ids is None:
raise ValueError("You have to specify input_ids") raise ValueError("You have to specify input_ids")
@ -1407,11 +1407,11 @@ class TFGroupViTVisionMainLayer(keras.layers.Layer):
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if pixel_values is None: if pixel_values is None:
raise ValueError("You have to specify pixel_values") raise ValueError("You have to specify pixel_values")
@ -1518,9 +1518,9 @@ class TFGroupViTMainLayer(keras.layers.Layer):
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
if input_ids is None: if input_ids is None:
@ -1552,9 +1552,9 @@ class TFGroupViTMainLayer(keras.layers.Layer):
def get_image_features( def get_image_features(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
if pixel_values is None: if pixel_values is None:
@ -1582,13 +1582,13 @@ class TFGroupViTMainLayer(keras.layers.Layer):
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
return_loss: Optional[bool] = None, return_loss: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
output_segmentation: Optional[bool] = None, output_segmentation: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFGroupViTModelOutput, tuple[tf.Tensor]]: ) -> TFGroupViTModelOutput | tuple[tf.Tensor]:
if input_ids is None: if input_ids is None:
raise ValueError("You have to specify either input_ids") raise ValueError("You have to specify either input_ids")
if pixel_values is None: if pixel_values is None:
@ -1867,11 +1867,11 @@ class TFGroupViTTextModel(TFGroupViTPreTrainedModel):
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:
@ -1926,11 +1926,11 @@ class TFGroupViTVisionModel(TFGroupViTPreTrainedModel):
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:
@ -1989,9 +1989,9 @@ class TFGroupViTModel(TFGroupViTPreTrainedModel):
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
r""" r"""
@ -2028,9 +2028,9 @@ class TFGroupViTModel(TFGroupViTPreTrainedModel):
def get_image_features( def get_image_features(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
r""" r"""
@ -2075,13 +2075,13 @@ class TFGroupViTModel(TFGroupViTPreTrainedModel):
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
return_loss: Optional[bool] = None, return_loss: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
output_segmentation: Optional[bool] = None, output_segmentation: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFGroupViTModelOutput, tuple[tf.Tensor]]: ) -> TFGroupViTModelOutput | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:

View File

@ -17,7 +17,7 @@
from __future__ import annotations from __future__ import annotations
import warnings import warnings
from typing import Any, Optional, Union from typing import Any
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -152,7 +152,7 @@ def _compute_mask_indices(
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -701,7 +701,7 @@ class TFHubertAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]: ) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -896,7 +896,7 @@ class TFHubertEncoderLayer(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
attn_residual = hidden_states attn_residual = hidden_states
@ -956,7 +956,7 @@ class TFHubertEncoderLayerStableLayerNorm(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
attn_residual = hidden_states attn_residual = hidden_states
@ -1007,11 +1007,11 @@ class TFHubertEncoder(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
output_hidden_states: Optional[bool] = False, output_hidden_states: bool | None = False,
return_dict: Optional[bool] = True, return_dict: bool | None = True,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None all_self_attentions = () if output_attentions else None
@ -1090,11 +1090,11 @@ class TFHubertEncoderStableLayerNorm(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
output_hidden_states: Optional[bool] = False, output_hidden_states: bool | None = False,
return_dict: Optional[bool] = True, return_dict: bool | None = True,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None all_self_attentions = () if output_attentions else None
@ -1260,7 +1260,7 @@ class TFHubertMainLayer(keras.layers.Layer):
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: tf.Tensor | None = None, output_attentions: tf.Tensor | None = None,
output_hidden_states: tf.Tensor | None = None, output_hidden_states: tf.Tensor | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
**kwargs: Any, **kwargs: Any,
): ):
@ -1445,11 +1445,11 @@ class TFHubertModel(TFHubertPreTrainedModel):
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
""" """
Returns: Returns:
@ -1551,12 +1551,12 @@ class TFHubertForCTC(TFHubertPreTrainedModel):
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFCausalLMOutput, tuple[tf.Tensor]]: ) -> TFCausalLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,

View File

@ -22,7 +22,6 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import tensorflow as tf import tensorflow as tf
@ -91,11 +90,11 @@ class TFIdeficsBaseModelOutputWithPast(ModelOutput):
image_hidden_states of the model produced by the vision encoder, and optionally by the perceiver image_hidden_states of the model produced by the vision encoder, and optionally by the perceiver
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
past_key_values: Optional[tuple[tuple[tf.Tensor]]] = None past_key_values: tuple[tuple[tf.Tensor]] | None = None
hidden_states: Optional[tuple[tf.Tensor]] = None hidden_states: tuple[tf.Tensor] | None = None
attentions: Optional[tuple[tf.Tensor]] = None attentions: tuple[tf.Tensor] | None = None
image_hidden_states: Optional[tuple[tf.Tensor]] = None image_hidden_states: tuple[tf.Tensor] | None = None
@dataclass @dataclass
@ -132,12 +131,12 @@ class TFIdeficsCausalLMOutputWithPast(ModelOutput):
image_hidden_states of the model produced by the vision encoder, and optionally by the perceiver image_hidden_states of the model produced by the vision encoder, and optionally by the perceiver
""" """
loss: Optional[tf.Tensor] = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
past_key_values: Optional[list[tf.Tensor]] = None past_key_values: list[tf.Tensor] | None = None
hidden_states: Optional[tuple[tf.Tensor]] = None hidden_states: tuple[tf.Tensor] | None = None
attentions: Optional[tuple[tf.Tensor]] = None attentions: tuple[tf.Tensor] | None = None
image_hidden_states: Optional[tuple[tf.Tensor]] = None image_hidden_states: tuple[tf.Tensor] | None = None
def expand_inputs_for_generation( def expand_inputs_for_generation(
@ -278,7 +277,7 @@ class TFIdeficsDecoupledEmbedding(tf.keras.layers.Embedding):
num_embeddings, num_embeddings,
num_additional_embeddings, num_additional_embeddings,
embedding_dim, embedding_dim,
partially_freeze: Optional[bool] = False, partially_freeze: bool | None = False,
dtype=None, dtype=None,
**kwargs, **kwargs,
) -> None: ) -> None:
@ -658,13 +657,13 @@ class TFIdeficsAttention(tf.keras.layers.Layer):
def call( def call(
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
key_value_states: Optional[tf.Tensor] = None, key_value_states: tf.Tensor | None = None,
attention_mask: Optional[tf.Tensor] = None, attention_mask: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
past_key_value: Optional[tuple[tf.Tensor]] = None, past_key_value: tuple[tf.Tensor] | None = None,
output_attentions: bool = False, output_attentions: bool = False,
use_cache: bool = False, use_cache: bool = False,
) -> tuple[tf.Tensor, Optional[tf.Tensor], Optional[tuple[tf.Tensor]]]: ) -> tuple[tf.Tensor, tf.Tensor | None, tuple[tf.Tensor] | None]:
# if key_value_states are provided this layer is used as a cross-attention layer # if key_value_states are provided this layer is used as a cross-attention layer
is_cross_attention = self.is_cross_attention or key_value_states is not None is_cross_attention = self.is_cross_attention or key_value_states is not None
@ -791,13 +790,13 @@ class TFIdeficsDecoderLayer(tf.keras.layers.Layer):
def call( def call(
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: Optional[tf.Tensor] = None, attention_mask: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
past_key_value: Optional[tuple[tf.Tensor]] = None, past_key_value: tuple[tf.Tensor] | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
use_cache: Optional[bool] = False, use_cache: bool | None = False,
training=False, training=False,
) -> tuple[tf.Tensor, Optional[tuple[tf.Tensor, tf.Tensor]]]: ) -> tuple[tf.Tensor, tuple[tf.Tensor, tf.Tensor] | None]:
""" """
Args: Args:
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)` hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
@ -979,14 +978,14 @@ class TFIdeficsGatedCrossAttentionLayer(tf.keras.layers.Layer):
def call( def call(
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: Optional[tf.Tensor] = None, attention_mask: tf.Tensor | None = None,
image_hidden_states: Optional[tf.Tensor] = None, image_hidden_states: tf.Tensor | None = None,
image_attention_mask: Optional[tf.Tensor] = None, image_attention_mask: tf.Tensor | None = None,
cross_attention_gate: Optional[tf.Tensor] = None, cross_attention_gate: tf.Tensor | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
use_cache: Optional[bool] = False, use_cache: bool | None = False,
past_key_value: Optional[tuple[tf.Tensor]] = None, past_key_value: tuple[tf.Tensor] | None = None,
) -> tuple[tf.Tensor, Optional[tuple[tf.Tensor, tf.Tensor]]]: ) -> tuple[tf.Tensor, tuple[tf.Tensor, tf.Tensor] | None]:
""" """
Args: Args:
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)` hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
@ -1255,21 +1254,21 @@ class TFIdeficsMainLayer(tf.keras.layers.Layer):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: Optional[tf.Tensor] = None, attention_mask: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
past_key_values: Optional[list[tf.Tensor]] = None, past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
pixel_values: Optional[tf.Tensor] = None, pixel_values: tf.Tensor | None = None,
image_encoder_embeddings: Optional[tf.Tensor] = None, image_encoder_embeddings: tf.Tensor | None = None,
perceiver_embeddings: Optional[tf.Tensor] = None, perceiver_embeddings: tf.Tensor | None = None,
image_attention_mask: Optional[tf.Tensor] = None, image_attention_mask: tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
interpolate_pos_encoding: Optional[bool] = False, interpolate_pos_encoding: bool | None = False,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = None, training: bool | None = None,
) -> Union[TFIdeficsBaseModelOutputWithPast, tuple[tf.Tensor]]: ) -> TFIdeficsBaseModelOutputWithPast | tuple[tf.Tensor]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -1554,21 +1553,21 @@ class TFIdeficsModel(TFIdeficsPreTrainedModel):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: Optional[tf.Tensor] = None, attention_mask: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
past_key_values: Optional[list[tf.Tensor]] = None, past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
pixel_values: Optional[tf.Tensor] = None, pixel_values: tf.Tensor | None = None,
image_encoder_embeddings: Optional[tf.Tensor] = None, image_encoder_embeddings: tf.Tensor | None = None,
perceiver_embeddings: Optional[tf.Tensor] = None, perceiver_embeddings: tf.Tensor | None = None,
image_attention_mask: Optional[tf.Tensor] = None, image_attention_mask: tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
interpolate_pos_encoding: Optional[bool] = False, interpolate_pos_encoding: bool | None = False,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = None, training: bool | None = None,
) -> Union[TFIdeficsBaseModelOutputWithPast, tuple[tf.Tensor]]: ) -> TFIdeficsBaseModelOutputWithPast | tuple[tf.Tensor]:
outputs = self.model( outputs = self.model(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -1659,22 +1658,22 @@ class TFIdeficsForVisionText2Text(TFPreTrainedModel, TFCausalLanguageModelingLos
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: Optional[tf.Tensor] = None, attention_mask: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
past_key_values: Optional[list[tf.Tensor]] = None, past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
pixel_values: Optional[tf.Tensor] = None, pixel_values: tf.Tensor | None = None,
image_encoder_embeddings: Optional[tf.Tensor] = None, image_encoder_embeddings: tf.Tensor | None = None,
perceiver_embeddings: Optional[tf.Tensor] = None, perceiver_embeddings: tf.Tensor | None = None,
image_attention_mask: Optional[tf.Tensor] = None, image_attention_mask: tf.Tensor | None = None,
labels: Optional[tf.Tensor] = None, labels: tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
interpolate_pos_encoding: Optional[bool] = False, interpolate_pos_encoding: bool | None = False,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training=False, training=False,
) -> Union[TFIdeficsCausalLMOutputWithPast, tuple[tf.Tensor]]: ) -> TFIdeficsCausalLMOutputWithPast | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import math import math
import warnings import warnings
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -127,11 +126,11 @@ class TFLayoutLMEmbeddings(keras.layers.Layer):
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
bbox: Optional[tf.Tensor] = None, bbox: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
token_type_ids: Optional[tf.Tensor] = None, token_type_ids: tf.Tensor | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
""" """
@ -584,12 +583,12 @@ class TFLayoutLMEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None, encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None, encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None, past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool], use_cache: bool | None,
output_attentions: bool, output_attentions: bool,
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -825,11 +824,11 @@ class TFLayoutLMMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
@ -1070,11 +1069,11 @@ class TFLayoutLMModel(TFLayoutLMPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:
@ -1175,12 +1174,12 @@ class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingL
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1304,12 +1303,12 @@ class TFLayoutLMForSequenceClassification(TFLayoutLMPreTrainedModel, TFSequenceC
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1440,12 +1439,12 @@ class TFLayoutLMForTokenClassification(TFLayoutLMPreTrainedModel, TFTokenClassif
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1572,13 +1571,13 @@ class TFLayoutLMForQuestionAnswering(TFLayoutLMPreTrainedModel, TFQuestionAnswer
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import collections import collections
import math import math
from typing import Optional, Union
import tensorflow as tf import tensorflow as tf
@ -231,7 +230,7 @@ class TFLayoutLMv3TextEmbeddings(keras.layers.Layer):
def call( def call(
self, self,
input_ids: tf.Tensor | None = None, input_ids: tf.Tensor | None = None,
bbox: Optional[tf.Tensor] = None, bbox: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None, token_type_ids: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
@ -341,7 +340,7 @@ class TFLayoutLMv3SelfAttention(keras.layers.Layer):
x = tf.reshape(x, new_shape) x = tf.reshape(x, new_shape)
return tf.transpose(x, perm=[0, 2, 1, 3]) # batch_size, num_heads, seq_length, attention_head_size return tf.transpose(x, perm=[0, 2, 1, 3]) # batch_size, num_heads, seq_length, attention_head_size
def cogview_attention(self, attention_scores: tf.Tensor, alpha: Union[float, int] = 32): def cogview_attention(self, attention_scores: tf.Tensor, alpha: float | int = 32):
""" """
https://huggingface.co/papers/2105.13290 Section 2.4 Stabilization of training: Precision Bottleneck Relaxation https://huggingface.co/papers/2105.13290 Section 2.4 Stabilization of training: Precision Bottleneck Relaxation
(PB-Relax). A replacement of the original keras.layers.Softmax(axis=-1)(attention_scores). Seems the new (PB-Relax). A replacement of the original keras.layers.Softmax(axis=-1)(attention_scores). Seems the new
@ -363,7 +362,7 @@ class TFLayoutLMv3SelfAttention(keras.layers.Layer):
rel_pos: tf.Tensor | None = None, rel_pos: tf.Tensor | None = None,
rel_2d_pos: tf.Tensor | None = None, rel_2d_pos: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], tuple[tf.Tensor, tf.Tensor]]: ) -> tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor]:
key_layer = self.transpose_for_scores(self.key(hidden_states)) key_layer = self.transpose_for_scores(self.key(hidden_states))
value_layer = self.transpose_for_scores(self.value(hidden_states)) value_layer = self.transpose_for_scores(self.value(hidden_states))
query_layer = self.transpose_for_scores(self.query(hidden_states)) query_layer = self.transpose_for_scores(self.query(hidden_states))
@ -468,7 +467,7 @@ class TFLayoutLMv3Attention(keras.layers.Layer):
rel_pos: tf.Tensor | None = None, rel_pos: tf.Tensor | None = None,
rel_2d_pos: tf.Tensor | None = None, rel_2d_pos: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], tuple[tf.Tensor, tf.Tensor]]: ) -> tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor]:
self_outputs = self.self_attention( self_outputs = self.self_attention(
hidden_states, hidden_states,
attention_mask, attention_mask,
@ -571,7 +570,7 @@ class TFLayoutLMv3Layer(keras.layers.Layer):
rel_pos: tf.Tensor | None = None, rel_pos: tf.Tensor | None = None,
rel_2d_pos: tf.Tensor | None = None, rel_2d_pos: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], tuple[tf.Tensor, tf.Tensor]]: ) -> tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor]:
self_attention_outputs = self.attention( self_attention_outputs = self.attention(
hidden_states, hidden_states,
attention_mask, attention_mask,
@ -711,12 +710,7 @@ class TFLayoutLMv3Encoder(keras.layers.Layer):
return_dict: bool = True, return_dict: bool = True,
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[ ) -> TFBaseModelOutput | tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor] | tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
TFBaseModelOutput,
tuple[tf.Tensor],
tuple[tf.Tensor, tf.Tensor],
tuple[tf.Tensor, tf.Tensor, tf.Tensor],
]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None all_self_attentions = () if output_attentions else None
@ -926,7 +920,7 @@ class TFLayoutLMv3MainLayer(keras.layers.Layer):
return extended_attention_mask return extended_attention_mask
def get_head_mask(self, head_mask: tf.Tensor | None) -> Union[tf.Tensor, list[tf.Tensor | None]]: def get_head_mask(self, head_mask: tf.Tensor | None) -> tf.Tensor | list[tf.Tensor | None]:
if head_mask is None: if head_mask is None:
return [None] * self.config.num_hidden_layers return [None] * self.config.num_hidden_layers
@ -962,16 +956,11 @@ class TFLayoutLMv3MainLayer(keras.layers.Layer):
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[ ) -> TFBaseModelOutput | tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor] | tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
TFBaseModelOutput,
tuple[tf.Tensor],
tuple[tf.Tensor, tf.Tensor],
tuple[tf.Tensor, tf.Tensor, tf.Tensor],
]:
# This method can be called with a variety of modalities: # This method can be called with a variety of modalities:
# 1. text + layout # 1. text + layout
# 2. text + layout + image # 2. text + layout + image
@ -1274,16 +1263,11 @@ class TFLayoutLMv3Model(TFLayoutLMv3PreTrainedModel):
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[ ) -> TFBaseModelOutput | tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor] | tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
TFBaseModelOutput,
tuple[tf.Tensor],
tuple[tf.Tensor, tf.Tensor],
tuple[tf.Tensor, tf.Tensor, tf.Tensor],
]:
r""" r"""
Returns: Returns:
@ -1413,19 +1397,19 @@ class TFLayoutLMv3ForSequenceClassification(TFLayoutLMv3PreTrainedModel, TFSeque
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
bbox: tf.Tensor | None = None, bbox: tf.Tensor | None = None,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[ ) -> (
TFSequenceClassifierOutput, TFSequenceClassifierOutput
tuple[tf.Tensor], | tuple[tf.Tensor]
tuple[tf.Tensor, tf.Tensor], | tuple[tf.Tensor, tf.Tensor]
tuple[tf.Tensor, tf.Tensor, tf.Tensor], | tuple[tf.Tensor, tf.Tensor, tf.Tensor]
tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor], | tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]
]: ):
""" """
Returns: Returns:
@ -1539,18 +1523,18 @@ class TFLayoutLMv3ForTokenClassification(TFLayoutLMv3PreTrainedModel, TFTokenCla
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[ ) -> (
TFTokenClassifierOutput, TFTokenClassifierOutput
tuple[tf.Tensor], | tuple[tf.Tensor]
tuple[tf.Tensor, tf.Tensor], | tuple[tf.Tensor, tf.Tensor]
tuple[tf.Tensor, tf.Tensor, tf.Tensor], | tuple[tf.Tensor, tf.Tensor, tf.Tensor]
tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor], | tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]
]: ):
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1668,19 +1652,19 @@ class TFLayoutLMv3ForQuestionAnswering(TFLayoutLMv3PreTrainedModel, TFQuestionAn
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
start_positions: tf.Tensor | None = None, start_positions: tf.Tensor | None = None,
end_positions: tf.Tensor | None = None, end_positions: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
bbox: tf.Tensor | None = None, bbox: tf.Tensor | None = None,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[ ) -> (
TFQuestionAnsweringModelOutput, TFQuestionAnsweringModelOutput
tuple[tf.Tensor], | tuple[tf.Tensor]
tuple[tf.Tensor, tf.Tensor], | tuple[tf.Tensor, tf.Tensor]
tuple[tf.Tensor, tf.Tensor, tf.Tensor], | tuple[tf.Tensor, tf.Tensor, tf.Tensor]
tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor], | tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]
]: ):
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import random import random
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -100,7 +99,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -1470,7 +1469,7 @@ class TFLEDEncoderBaseModelOutput(ModelOutput):
in the sequence. in the sequence.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None global_attentions: tuple[tf.Tensor, ...] | None = None
@ -1533,7 +1532,7 @@ class TFLEDSeq2SeqModelOutput(ModelOutput):
in the sequence. in the sequence.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
decoder_hidden_states: tuple[tf.Tensor, ...] | None = None decoder_hidden_states: tuple[tf.Tensor, ...] | None = None
decoder_attentions: tuple[tf.Tensor, ...] | None = None decoder_attentions: tuple[tf.Tensor, ...] | None = None
@ -1600,7 +1599,7 @@ class TFLEDSeq2SeqLMOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
decoder_hidden_states: tuple[tf.Tensor, ...] | None = None decoder_hidden_states: tuple[tf.Tensor, ...] | None = None
decoder_attentions: tuple[tf.Tensor, ...] | None = None decoder_attentions: tuple[tf.Tensor, ...] | None = None
@ -1731,7 +1730,7 @@ class TFLEDEncoder(keras.layers.Layer):
config: LEDConfig config: LEDConfig
""" """
def __init__(self, config: LEDConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: LEDConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.dropout = keras.layers.Dropout(config.dropout) self.dropout = keras.layers.Dropout(config.dropout)
@ -2001,7 +2000,7 @@ class TFLEDDecoder(keras.layers.Layer):
embed_tokens: output embedding embed_tokens: output embedding
""" """
def __init__(self, config: LEDConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: LEDConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.padding_idx = config.pad_token_id self.padding_idx = config.pad_token_id
@ -2253,7 +2252,7 @@ class TFLEDMainLayer(keras.layers.Layer):
decoder_attention_mask=None, decoder_attention_mask=None,
head_mask=None, head_mask=None,
decoder_head_mask=None, decoder_head_mask=None,
encoder_outputs: Optional[Union[tuple, TFLEDEncoderBaseModelOutput]] = None, encoder_outputs: tuple | TFLEDEncoderBaseModelOutput | None = None,
global_attention_mask=None, global_attention_mask=None,
past_key_values=None, past_key_values=None,
inputs_embeds=None, inputs_embeds=None,
@ -2509,7 +2508,7 @@ class TFLEDForConditionalGeneration(TFLEDPreTrainedModel):
decoder_head_mask: np.ndarray | tf.Tensor | None = None, decoder_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: TFLEDEncoderBaseModelOutput | None = None, encoder_outputs: TFLEDEncoderBaseModelOutput | None = None,
global_attention_mask: np.ndarray | tf.Tensor | None = None, global_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: tuple[tuple[Union[np.ndarray, tf.Tensor]]] | None = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: bool | None = None, use_cache: bool | None = None,

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -93,7 +92,7 @@ class TFLongformerBaseModelOutput(ModelOutput):
in the sequence. in the sequence.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None global_attentions: tuple[tf.Tensor, ...] | None = None
@ -140,8 +139,8 @@ class TFLongformerBaseModelOutputWithPooling(ModelOutput):
in the sequence. in the sequence.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
pooler_output: Optional[tf.Tensor] = None pooler_output: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None global_attentions: tuple[tf.Tensor, ...] | None = None
@ -187,7 +186,7 @@ class TFLongformerMaskedLMOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None global_attentions: tuple[tf.Tensor, ...] | None = None
@ -235,8 +234,8 @@ class TFLongformerQuestionAnsweringModelOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
start_logits: Optional[tf.Tensor] = None start_logits: tf.Tensor | None = None
end_logits: Optional[tf.Tensor] = None end_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None global_attentions: tuple[tf.Tensor, ...] | None = None
@ -282,7 +281,7 @@ class TFLongformerSequenceClassifierOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None global_attentions: tuple[tf.Tensor, ...] | None = None
@ -330,7 +329,7 @@ class TFLongformerMultipleChoiceModelOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None global_attentions: tuple[tf.Tensor, ...] | None = None
@ -376,7 +375,7 @@ class TFLongformerTokenClassifierOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None global_attentions: tuple[tf.Tensor, ...] | None = None
@ -2138,11 +2137,11 @@ class TFLongformerModel(TFLongformerPreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFLongformerBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFLongformerBaseModelOutputWithPooling | tuple[tf.Tensor]:
outputs = self.longformer( outputs = self.longformer(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -2208,12 +2207,12 @@ class TFLongformerForMaskedLM(TFLongformerPreTrainedModel, TFMaskedLanguageModel
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFLongformerMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFLongformerMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -2304,13 +2303,13 @@ class TFLongformerForQuestionAnswering(TFLongformerPreTrainedModel, TFQuestionAn
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFLongformerQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFLongformerQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.
@ -2477,12 +2476,12 @@ class TFLongformerForSequenceClassification(TFLongformerPreTrainedModel, TFSeque
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
global_attention_mask: np.ndarray | tf.Tensor | None = None, global_attention_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFLongformerSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFLongformerSequenceClassifierOutput | tuple[tf.Tensor]:
if input_ids is not None and not isinstance(input_ids, tf.Tensor): if input_ids is not None and not isinstance(input_ids, tf.Tensor):
input_ids = tf.convert_to_tensor(input_ids, dtype=tf.int64) input_ids = tf.convert_to_tensor(input_ids, dtype=tf.int64)
elif input_ids is not None: elif input_ids is not None:
@ -2603,12 +2602,12 @@ class TFLongformerForMultipleChoice(TFLongformerPreTrainedModel, TFMultipleChoic
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
global_attention_mask: np.ndarray | tf.Tensor | None = None, global_attention_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFLongformerMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFLongformerMultipleChoiceModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -2721,12 +2720,12 @@ class TFLongformerForTokenClassification(TFLongformerPreTrainedModel, TFTokenCla
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
global_attention_mask: np.ndarray | tf.Tensor | None = None, global_attention_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: Optional[Union[np.array, tf.Tensor]] = None, labels: np.array | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFLongformerTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFLongformerTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

View File

@ -20,7 +20,6 @@ from __future__ import annotations
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -1109,11 +1108,11 @@ class TFLxmertModel(TFLxmertPreTrainedModel):
visual_attention_mask: np.ndarray | tf.Tensor | None = None, visual_attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple, TFLxmertModelOutput]: ) -> tuple | TFLxmertModelOutput:
outputs = self.lxmert( outputs = self.lxmert(
input_ids, input_ids,
visual_feats, visual_feats,

View File

@ -17,7 +17,6 @@
from __future__ import annotations from __future__ import annotations
import random import random
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -103,7 +102,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -216,7 +215,7 @@ class TFMarianAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]: ) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -367,7 +366,7 @@ class TFMarianEncoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: np.ndarray | tf.Tensor | None, attention_mask: np.ndarray | tf.Tensor | None,
layer_head_mask: tf.Tensor | None, layer_head_mask: tf.Tensor | None,
training: Optional[bool] = False, training: bool | None = False,
) -> tf.Tensor: ) -> tf.Tensor:
""" """
Args: Args:
@ -461,8 +460,8 @@ class TFMarianDecoderLayer(keras.layers.Layer):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None, cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_value: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]: ) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
""" """
Args: Args:
@ -723,7 +722,7 @@ class TFMarianEncoder(keras.layers.Layer):
config: MarianConfig config: MarianConfig
""" """
def __init__(self, config: MarianConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: MarianConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.dropout = keras.layers.Dropout(config.dropout) self.dropout = keras.layers.Dropout(config.dropout)
@ -753,9 +752,9 @@ class TFMarianEncoder(keras.layers.Layer):
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
): ):
""" """
@ -890,7 +889,7 @@ class TFMarianDecoder(keras.layers.Layer):
embed_tokens: output embedding embed_tokens: output embedding
""" """
def __init__(self, config: MarianConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: MarianConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.padding_idx = config.pad_token_id self.padding_idx = config.pad_token_id
@ -924,10 +923,10 @@ class TFMarianDecoder(keras.layers.Layer):
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None,
past_key_values: tuple[tuple[tf.Tensor]] | None = None, past_key_values: tuple[tuple[tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
): ):
r""" r"""
@ -1154,14 +1153,14 @@ class TFMarianMainLayer(keras.layers.Layer):
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None, decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None, encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: Optional[tuple[tuple[tf.Tensor]]] = None, past_key_values: tuple[tuple[tf.Tensor]] | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
**kwargs, **kwargs,
): ):

View File

@ -17,7 +17,6 @@
from __future__ import annotations from __future__ import annotations
import random import random
from typing import Optional, Union
import tensorflow as tf import tensorflow as tf
@ -102,7 +101,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -129,7 +128,7 @@ class TFMBartLearnedPositionalEmbedding(keras.layers.Embedding):
def call( def call(
self, self,
input_shape: Optional[tf.TensorShape] = None, input_shape: tf.TensorShape | None = None,
past_key_values_length: int = 0, past_key_values_length: int = 0,
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
): ):
@ -185,7 +184,7 @@ class TFMBartAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]: ) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -335,7 +334,7 @@ class TFMBartEncoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor, attention_mask: tf.Tensor,
layer_head_mask: tf.Tensor, layer_head_mask: tf.Tensor,
training: Optional[bool] = False, training: bool | None = False,
): ):
""" """
Args: Args:
@ -429,7 +428,7 @@ class TFMBartDecoderLayer(keras.layers.Layer):
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None, cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: tuple[tf.Tensor] | None = None, past_key_value: tuple[tf.Tensor] | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]: ) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
""" """
Args: Args:
@ -713,7 +712,7 @@ class TFMBartEncoder(keras.layers.Layer):
config: MBartConfig config: MBartConfig
""" """
def __init__(self, config: MBartConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: MBartConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.dropout = keras.layers.Dropout(config.dropout) self.dropout = keras.layers.Dropout(config.dropout)
@ -746,11 +745,11 @@ class TFMBartEncoder(keras.layers.Layer):
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
""" """
Args: Args:
input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`): input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`):
@ -892,7 +891,7 @@ class TFMBartDecoder(keras.layers.Layer):
embed_tokens: output embedding embed_tokens: output embedding
""" """
def __init__(self, config: MBartConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: MBartConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.padding_idx = config.pad_token_id self.padding_idx = config.pad_token_id
@ -928,14 +927,12 @@ class TFMBartDecoder(keras.layers.Layer):
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None,
past_key_values: tuple[tuple[tf.Tensor]] | None = None, past_key_values: tuple[tuple[tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[ ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]:
TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]
]:
r""" r"""
Args: Args:
input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`): input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`):
@ -1169,17 +1166,17 @@ class TFMBartMainLayer(keras.layers.Layer):
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None, decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None, encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[tf.Tensor]] | None = None, past_key_values: tuple[tuple[tf.Tensor]] | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs, **kwargs,
) -> Union[TFSeq2SeqModelOutput, tf.Tensor]: ) -> TFSeq2SeqModelOutput | tf.Tensor:
if decoder_input_ids is None and decoder_inputs_embeds is None: if decoder_input_ids is None and decoder_inputs_embeds is None:
use_cache = False use_cache = False
@ -1293,17 +1290,17 @@ class TFMBartModel(TFMBartPreTrainedModel):
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None, decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None, encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[tf.Tensor]] | None = None, past_key_values: tuple[tuple[tf.Tensor]] | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs, **kwargs,
) -> Union[TFSeq2SeqModelOutput, tuple[tf.Tensor]]: ) -> TFSeq2SeqModelOutput | tuple[tf.Tensor]:
outputs = self.model( outputs = self.model(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -1429,17 +1426,17 @@ class TFMBartForConditionalGeneration(TFMBartPreTrainedModel, TFCausalLanguageMo
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None, decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[TFBaseModelOutput] = None, encoder_outputs: TFBaseModelOutput | None = None,
past_key_values: Optional[tuple[tuple[tf.Tensor]]] = None, past_key_values: tuple[tuple[tf.Tensor]] | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSeq2SeqLMOutput, tuple[tf.Tensor]]: ) -> TFSeq2SeqLMOutput | tuple[tf.Tensor]:
""" """
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -1063,8 +1062,8 @@ class TFMobileBertForPreTrainingOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
prediction_logits: Optional[tf.Tensor] = None prediction_logits: tf.Tensor | None = None
seq_relationship_logits: Optional[tf.Tensor] = None seq_relationship_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -1191,11 +1190,11 @@ class TFMobileBertModel(TFMobileBertPreTrainedModel):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFBaseModelOutputWithPooling]: ) -> tuple | TFBaseModelOutputWithPooling:
outputs = self.mobilebert( outputs = self.mobilebert(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -1252,13 +1251,13 @@ class TFMobileBertForPreTraining(TFMobileBertPreTrainedModel, TFMobileBertPreTra
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
next_sentence_label: np.ndarray | tf.Tensor | None = None, next_sentence_label: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFMobileBertForPreTrainingOutput]: ) -> tuple | TFMobileBertForPreTrainingOutput:
r""" r"""
Return: Return:
@ -1369,12 +1368,12 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFMaskedLMOutput]: ) -> tuple | TFMaskedLMOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1471,12 +1470,12 @@ class TFMobileBertForNextSentencePrediction(TFMobileBertPreTrainedModel, TFNextS
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
next_sentence_label: np.ndarray | tf.Tensor | None = None, next_sentence_label: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFNextSentencePredictorOutput]: ) -> tuple | TFNextSentencePredictorOutput:
r""" r"""
Return: Return:
@ -1587,12 +1586,12 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFSequenceClassifierOutput]: ) -> tuple | TFSequenceClassifierOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1687,13 +1686,13 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFQuestionAnsweringModelOutput]: ) -> tuple | TFQuestionAnsweringModelOutput:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.
@ -1796,12 +1795,12 @@ class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoic
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFMultipleChoiceModelOutput]: ) -> tuple | TFMultipleChoiceModelOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1914,12 +1913,12 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFTokenClassifierOutput]: ) -> tuple | TFTokenClassifierOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

View File

@ -18,8 +18,6 @@
from __future__ import annotations from __future__ import annotations
from typing import Optional, Union
import tensorflow as tf import tensorflow as tf
from ...activations_tf import get_tf_activation from ...activations_tf import get_tf_activation
@ -61,7 +59,7 @@ _IMAGE_CLASS_CHECKPOINT = "apple/mobilevit-small"
_IMAGE_CLASS_EXPECTED_OUTPUT = "tabby, tabby cat" _IMAGE_CLASS_EXPECTED_OUTPUT = "tabby, tabby cat"
def make_divisible(value: int, divisor: int = 8, min_value: Optional[int] = None) -> int: def make_divisible(value: int, divisor: int = 8, min_value: int | None = None) -> int:
""" """
Ensure that all layers have a channel count that is divisible by `divisor`. This function is taken from the Ensure that all layers have a channel count that is divisible by `divisor`. This function is taken from the
original TensorFlow repo. It can be seen here: original TensorFlow repo. It can be seen here:
@ -88,7 +86,7 @@ class TFMobileViTConvLayer(keras.layers.Layer):
bias: bool = False, bias: bool = False,
dilation: int = 1, dilation: int = 1,
use_normalization: bool = True, use_normalization: bool = True,
use_activation: Union[bool, str] = True, use_activation: bool | str = True,
**kwargs, **kwargs,
) -> None: ) -> None:
super().__init__(**kwargs) super().__init__(**kwargs)
@ -764,7 +762,7 @@ class TFMobileViTEncoder(keras.layers.Layer):
output_hidden_states: bool = False, output_hidden_states: bool = False,
return_dict: bool = True, return_dict: bool = True,
training: bool = False, training: bool = False,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
for i, layer_module in enumerate(self.layers): for i, layer_module in enumerate(self.layers):
@ -830,10 +828,10 @@ class TFMobileViTMainLayer(keras.layers.Layer):
def call( def call(
self, self,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], TFBaseModelOutputWithPooling]: ) -> tuple[tf.Tensor] | TFBaseModelOutputWithPooling:
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
) )
@ -998,10 +996,10 @@ class TFMobileViTModel(TFMobileViTPreTrainedModel):
def call( def call(
self, self,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], TFBaseModelOutputWithPooling]: ) -> tuple[tf.Tensor] | TFBaseModelOutputWithPooling:
output = self.mobilevit(pixel_values, output_hidden_states, return_dict, training=training) output = self.mobilevit(pixel_values, output_hidden_states, return_dict, training=training)
return output return output
@ -1046,11 +1044,11 @@ class TFMobileViTForImageClassification(TFMobileViTPreTrainedModel, TFSequenceCl
def call( def call(
self, self,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFImageClassifierOutputWithNoAttention]: ) -> tuple | TFImageClassifierOutputWithNoAttention:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ..., Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
@ -1287,10 +1285,10 @@ class TFMobileViTForSemanticSegmentation(TFMobileViTPreTrainedModel):
self, self,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple, TFSemanticSegmenterOutputWithNoAttention]: ) -> tuple | TFSemanticSegmenterOutputWithNoAttention:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, height, width)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, height, width)`, *optional*):
Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ..., Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math import math
import warnings import warnings
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -765,15 +764,15 @@ class TFMPNetModel(TFMPNetPreTrainedModel):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: Optional[Union[np.array, tf.Tensor]] = None, attention_mask: np.array | tf.Tensor | None = None,
position_ids: Optional[Union[np.array, tf.Tensor]] = None, position_ids: np.array | tf.Tensor | None = None,
head_mask: Optional[Union[np.array, tf.Tensor]] = None, head_mask: np.array | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
outputs = self.mpnet( outputs = self.mpnet(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -887,12 +886,12 @@ class TFMPNetForMaskedLM(TFMPNetPreTrainedModel, TFMaskedLanguageModelingLoss):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1002,16 +1001,16 @@ class TFMPNetForSequenceClassification(TFMPNetPreTrainedModel, TFSequenceClassif
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: Optional[Union[np.array, tf.Tensor]] = None, attention_mask: np.array | tf.Tensor | None = None,
position_ids: Optional[Union[np.array, tf.Tensor]] = None, position_ids: np.array | tf.Tensor | None = None,
head_mask: Optional[Union[np.array, tf.Tensor]] = None, head_mask: np.array | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1090,12 +1089,12 @@ class TFMPNetForMultipleChoice(TFMPNetPreTrainedModel, TFMultipleChoiceLoss):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1191,12 +1190,12 @@ class TFMPNetForTokenClassification(TFMPNetPreTrainedModel, TFTokenClassificatio
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1272,18 +1271,18 @@ class TFMPNetForQuestionAnswering(TFMPNetPreTrainedModel, TFQuestionAnsweringLos
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: Optional[Union[np.array, tf.Tensor]] = None, attention_mask: np.array | tf.Tensor | None = None,
position_ids: Optional[Union[np.array, tf.Tensor]] = None, position_ids: np.array | tf.Tensor | None = None,
head_mask: Optional[Union[np.array, tf.Tensor]] = None, head_mask: np.array | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: tf.Tensor | None = None, start_positions: tf.Tensor | None = None,
end_positions: tf.Tensor | None = None, end_positions: tf.Tensor | None = None,
training: bool = False, training: bool = False,
**kwargs, **kwargs,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -18,7 +18,6 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -293,11 +292,11 @@ class TFOpenAIGPTMainLayer(keras.layers.Layer):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
@ -429,8 +428,8 @@ class TFOpenAIGPTDoubleHeadsModelOutput(ModelOutput):
heads. heads.
""" """
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
mc_logits: Optional[tf.Tensor] = None mc_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -557,11 +556,11 @@ class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
outputs = self.transformer( outputs = self.transformer(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -620,12 +619,12 @@ class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel, TFCausalLanguageModelin
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFCausalLMOutput]: ) -> tuple | TFCausalLMOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ..., Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,
@ -708,11 +707,11 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
mc_token_ids: np.ndarray | tf.Tensor | None = None, mc_token_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFOpenAIGPTDoubleHeadsModelOutput]: ) -> tuple | TFOpenAIGPTDoubleHeadsModelOutput:
r""" r"""
mc_token_ids (`tf.Tensor` or `Numpy array` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input): mc_token_ids (`tf.Tensor` or `Numpy array` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input):
Index of the classification token in each input sequence. Selected in the range `[0, input_ids.size(-1) - Index of the classification token in each input sequence. Selected in the range `[0, input_ids.size(-1) -
@ -853,12 +852,12 @@ class TFOpenAIGPTForSequenceClassification(TFOpenAIGPTPreTrainedModel, TFSequenc
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFSequenceClassifierOutput]: ) -> tuple | TFSequenceClassifierOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ..., Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,

View File

@ -16,8 +16,6 @@
from __future__ import annotations from __future__ import annotations
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -78,7 +76,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -157,7 +155,7 @@ class TFOPTAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]: ) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -312,10 +310,10 @@ class TFOPTDecoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
past_key_value: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_value: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
training: Optional[bool] = False, training: bool | None = False,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
use_cache: Optional[bool] = False, use_cache: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]: ) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
""" """
Args: Args:
@ -578,13 +576,13 @@ class TFOPTDecoder(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPast, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPast | tuple[tf.Tensor]:
r""" r"""
Args: Args:
input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`): input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`):
@ -780,15 +778,15 @@ class TFOPTMainLayer(keras.layers.Layer):
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs, **kwargs,
) -> Union[TFBaseModelOutputWithPast, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPast | tuple[tf.Tensor]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -860,15 +858,15 @@ class TFOPTModel(TFOPTPreTrainedModel):
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs, **kwargs,
) -> Union[TFBaseModelOutputWithPast, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPast | tuple[tf.Tensor]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -963,19 +961,19 @@ class TFOPTForCausalLM(TFOPTPreTrainedModel, TFCausalLanguageModelingLoss):
def call( def call(
self, self,
input_ids: TFModelInputType | None = None, input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs, **kwargs,
) -> Union[TFCausalLMOutputWithPast, tuple[tf.Tensor]]: ) -> TFCausalLMOutputWithPast | tuple[tf.Tensor]:
r""" r"""
Args: Args:
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):

View File

@ -17,7 +17,6 @@
from __future__ import annotations from __future__ import annotations
import random import random
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -104,7 +103,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -218,7 +217,7 @@ class TFPegasusAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]: ) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -369,7 +368,7 @@ class TFPegasusEncoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor, attention_mask: tf.Tensor,
layer_head_mask: tf.Tensor, layer_head_mask: tf.Tensor,
training: Optional[bool] = False, training: bool | None = False,
): ):
""" """
Args: Args:
@ -464,7 +463,7 @@ class TFPegasusDecoderLayer(keras.layers.Layer):
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None, cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: tuple[tf.Tensor] | None = None, past_key_value: tuple[tf.Tensor] | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]: ) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
""" """
Args: Args:
@ -726,7 +725,7 @@ class TFPegasusEncoder(keras.layers.Layer):
config: PegasusConfig config: PegasusConfig
""" """
def __init__(self, config: PegasusConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: PegasusConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.dropout = keras.layers.Dropout(config.dropout) self.dropout = keras.layers.Dropout(config.dropout)
@ -757,10 +756,10 @@ class TFPegasusEncoder(keras.layers.Layer):
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
): ):
""" """
Args: Args:
@ -899,7 +898,7 @@ class TFPegasusDecoder(keras.layers.Layer):
embed_tokens: output embedding embed_tokens: output embedding
""" """
def __init__(self, config: PegasusConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs): def __init__(self, config: PegasusConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.config = config self.config = config
self.padding_idx = config.pad_token_id self.padding_idx = config.pad_token_id
@ -933,12 +932,12 @@ class TFPegasusDecoder(keras.layers.Layer):
encoder_attention_mask: tf.Tensor | None = None, encoder_attention_mask: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[tf.Tensor]]] = None, past_key_values: tuple[tuple[tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
): ):
r""" r"""
Args: Args:
@ -1169,15 +1168,15 @@ class TFPegasusMainLayer(keras.layers.Layer):
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None, decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None, encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: Optional[tuple[tuple[tf.Tensor]]] = None, past_key_values: tuple[tuple[tf.Tensor]] | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs, **kwargs,
): ):
if decoder_input_ids is None and decoder_inputs_embeds is None: if decoder_input_ids is None and decoder_inputs_embeds is None:
@ -1290,17 +1289,17 @@ class TFPegasusModel(TFPegasusPreTrainedModel):
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None, decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None, encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
**kwargs, **kwargs,
) -> Union[TFSeq2SeqModelOutput, tuple[tf.Tensor]]: ) -> TFSeq2SeqModelOutput | tuple[tf.Tensor]:
outputs = self.model( outputs = self.model(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -1426,17 +1425,17 @@ class TFPegasusForConditionalGeneration(TFPegasusPreTrainedModel, TFCausalLangua
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None, decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[TFBaseModelOutput] = None, encoder_outputs: TFBaseModelOutput | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFSeq2SeqLMOutput, tuple[tf.Tensor]]: ) -> TFSeq2SeqLMOutput | tuple[tf.Tensor]:
""" """
labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import copy import copy
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -115,7 +114,7 @@ class TFRetrievAugLMMarginOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
doc_scores: tf.Tensor | None = None doc_scores: tf.Tensor | None = None
retrieved_doc_embeds: tf.Tensor | None = None retrieved_doc_embeds: tf.Tensor | None = None
@ -198,7 +197,7 @@ class TFRetrievAugLMOutput(ModelOutput):
average in the self-attention heads. average in the self-attention heads.
""" """
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None past_key_values: list[tf.Tensor] | None = None
doc_scores: tf.Tensor | None = None doc_scores: tf.Tensor | None = None
retrieved_doc_embeds: tf.Tensor | None = None retrieved_doc_embeds: tf.Tensor | None = None
@ -232,8 +231,8 @@ class TFRagPreTrainedModel(TFPreTrainedModel):
@classmethod @classmethod
def from_pretrained_question_encoder_generator( def from_pretrained_question_encoder_generator(
cls, cls,
question_encoder_pretrained_model_name_or_path: Optional[str] = None, question_encoder_pretrained_model_name_or_path: str | None = None,
generator_pretrained_model_name_or_path: Optional[str] = None, generator_pretrained_model_name_or_path: str | None = None,
retriever: RagRetriever = None, retriever: RagRetriever = None,
*model_args, *model_args,
**kwargs, **kwargs,
@ -499,11 +498,11 @@ class TFRagModel(TFRagPreTrainedModel):
def __init__( def __init__(
self, self,
config: Optional[PretrainedConfig] = None, config: PretrainedConfig | None = None,
question_encoder: Optional[TFPreTrainedModel] = None, question_encoder: TFPreTrainedModel | None = None,
generator: Optional[TFPreTrainedModel] = None, generator: TFPreTrainedModel | None = None,
retriever: Optional[RagRetriever] = None, retriever: RagRetriever | None = None,
load_weight_prefix: Optional[str] = None, load_weight_prefix: str | None = None,
**kwargs, **kwargs,
): ):
assert config is not None or (question_encoder is not None and generator is not None), ( assert config is not None or (question_encoder is not None and generator is not None), (
@ -554,7 +553,7 @@ class TFRagModel(TFRagPreTrainedModel):
encoder_outputs: np.ndarray | tf.Tensor | None = None, encoder_outputs: np.ndarray | tf.Tensor | None = None,
decoder_input_ids: np.ndarray | tf.Tensor | None = None, decoder_input_ids: np.ndarray | tf.Tensor | None = None,
decoder_attention_mask: np.ndarray | tf.Tensor | None = None, decoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: tuple[tuple[Union[np.ndarray, tf.Tensor]]] | None = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
doc_scores: np.ndarray | tf.Tensor | None = None, doc_scores: np.ndarray | tf.Tensor | None = None,
context_input_ids: np.ndarray | tf.Tensor | None = None, context_input_ids: np.ndarray | tf.Tensor | None = None,
context_attention_mask: np.ndarray | tf.Tensor | None = None, context_attention_mask: np.ndarray | tf.Tensor | None = None,
@ -741,10 +740,10 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss
def __init__( def __init__(
self, self,
config: Optional[PretrainedConfig] = None, config: PretrainedConfig | None = None,
question_encoder: Optional[TFPreTrainedModel] = None, question_encoder: TFPreTrainedModel | None = None,
generator: Optional[TFPreTrainedModel] = None, generator: TFPreTrainedModel | None = None,
retriever: Optional[RagRetriever] = None, retriever: RagRetriever | None = None,
**kwargs, **kwargs,
): ):
assert config is not None or (question_encoder is not None and generator is not None), ( assert config is not None or (question_encoder is not None and generator is not None), (
@ -859,7 +858,7 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss
decoder_input_ids: np.ndarray | tf.Tensor | None = None, decoder_input_ids: np.ndarray | tf.Tensor | None = None,
decoder_attention_mask: np.ndarray | tf.Tensor | None = None, decoder_attention_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: np.ndarray | tf.Tensor | None = None, encoder_outputs: np.ndarray | tf.Tensor | None = None,
past_key_values: tuple[tuple[Union[np.ndarray, tf.Tensor]]] | None = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
doc_scores: np.ndarray | tf.Tensor | None = None, doc_scores: np.ndarray | tf.Tensor | None = None,
context_input_ids: np.ndarray | tf.Tensor | None = None, context_input_ids: np.ndarray | tf.Tensor | None = None,
context_attention_mask: np.ndarray | tf.Tensor | None = None, context_attention_mask: np.ndarray | tf.Tensor | None = None,
@ -1321,10 +1320,10 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL
def __init__( def __init__(
self, self,
config: Optional[PretrainedConfig] = None, config: PretrainedConfig | None = None,
question_encoder: Optional[TFPreTrainedModel] = None, question_encoder: TFPreTrainedModel | None = None,
generator: Optional[TFPreTrainedModel] = None, generator: TFPreTrainedModel | None = None,
retriever: Optional[RagRetriever] = None, retriever: RagRetriever | None = None,
**kwargs, **kwargs,
): ):
assert config is not None or (question_encoder is not None and generator is not None), ( assert config is not None or (question_encoder is not None and generator is not None), (
@ -1373,22 +1372,22 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL
decoder_input_ids: np.ndarray | tf.Tensor | None = None, decoder_input_ids: np.ndarray | tf.Tensor | None = None,
decoder_attention_mask: np.ndarray | tf.Tensor | None = None, decoder_attention_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: np.ndarray | tf.Tensor | None = None, encoder_outputs: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
doc_scores: np.ndarray | tf.Tensor | None = None, doc_scores: np.ndarray | tf.Tensor | None = None,
context_input_ids: np.ndarray | tf.Tensor | None = None, context_input_ids: np.ndarray | tf.Tensor | None = None,
context_attention_mask: np.ndarray | tf.Tensor | None = None, context_attention_mask: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
output_retrieved: Optional[bool] = None, output_retrieved: bool | None = None,
n_docs: Optional[int] = None, n_docs: int | None = None,
exclude_bos_score: Optional[bool] = None, exclude_bos_score: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
reduce_loss: Optional[bool] = None, reduce_loss: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
**kwargs, # needs kwargs for generation **kwargs, # needs kwargs for generation
) -> Union[tuple[tf.Tensor], TFRetrievAugLMMarginOutput]: ) -> tuple[tf.Tensor] | TFRetrievAugLMMarginOutput:
r""" r"""
exclude_bos_score (`bool`, *optional*): exclude_bos_score (`bool`, *optional*):
Only relevant if `labels` is passed. If `True`, the score of the BOS token is disregarded when computing Only relevant if `labels` is passed. If `True`, the score of the BOS token is disregarded when computing

View File

@ -17,7 +17,6 @@
from __future__ import annotations from __future__ import annotations
import math import math
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -106,10 +105,10 @@ class TFRemBertEmbeddings(keras.layers.Layer):
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
token_type_ids: Optional[tf.Tensor] = None, token_type_ids: tf.Tensor | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
past_key_values_length=0, past_key_values_length=0,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
@ -550,7 +549,7 @@ class TFRemBertEncoder(keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
hidden_states = self.embedding_hidden_mapping_in(inputs=hidden_states) hidden_states = self.embedding_hidden_mapping_in(inputs=hidden_states)
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
@ -766,13 +765,13 @@ class TFRemBertMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder: if not self.config.is_decoder:
use_cache = False use_cache = False
@ -1063,13 +1062,13 @@ class TFRemBertModel(TFRemBertPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1150,12 +1149,12 @@ class TFRemBertForMaskedLM(TFRemBertPreTrainedModel, TFMaskedLanguageModelingLos
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1246,14 +1245,14 @@ class TFRemBertForCausalLM(TFRemBertPreTrainedModel, TFCausalLanguageModelingLos
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]: ) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1364,12 +1363,12 @@ class TFRemBertForSequenceClassification(TFRemBertPreTrainedModel, TFSequenceCla
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1449,12 +1448,12 @@ class TFRemBertForMultipleChoice(TFRemBertPreTrainedModel, TFMultipleChoiceLoss)
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1559,12 +1558,12 @@ class TFRemBertForTokenClassification(TFRemBertPreTrainedModel, TFTokenClassific
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1643,13 +1642,13 @@ class TFRemBertForQuestionAnswering(TFRemBertPreTrainedModel, TFQuestionAnswerin
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math import math
import warnings import warnings
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -596,12 +595,12 @@ class TFRobertaEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None, encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None, encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None, past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool], use_cache: bool | None,
output_attentions: bool, output_attentions: bool,
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -709,13 +708,13 @@ class TFRobertaMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder: if not self.config.is_decoder:
use_cache = False use_cache = False
@ -1005,13 +1004,13 @@ class TFRobertaModel(TFRobertaPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFBaseModelOutputWithPoolingAndCrossAttentions]: ) -> tuple | TFBaseModelOutputWithPoolingAndCrossAttentions:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1156,12 +1155,12 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel, TFMaskedLanguageModelingLos
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1259,14 +1258,14 @@ class TFRobertaForCausalLM(TFRobertaPreTrainedModel, TFCausalLanguageModelingLos
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]: ) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1417,12 +1416,12 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel, TFSequenceCla
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1506,12 +1505,12 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss)
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1614,12 +1613,12 @@ class TFRobertaForTokenClassification(TFRobertaPreTrainedModel, TFTokenClassific
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1704,13 +1703,13 @@ class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel, TFQuestionAnswerin
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math import math
import warnings import warnings
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -597,12 +596,12 @@ class TFRobertaPreLayerNormEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None, encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None, encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None, past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool], use_cache: bool | None,
output_attentions: bool, output_attentions: bool,
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -707,13 +706,13 @@ class TFRobertaPreLayerNormMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder: if not self.config.is_decoder:
use_cache = False use_cache = False
@ -1007,13 +1006,13 @@ class TFRobertaPreLayerNormModel(TFRobertaPreLayerNormPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFBaseModelOutputWithPoolingAndCrossAttentions]: ) -> tuple | TFBaseModelOutputWithPoolingAndCrossAttentions:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1165,12 +1164,12 @@ class TFRobertaPreLayerNormForMaskedLM(TFRobertaPreLayerNormPreTrainedModel, TFM
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1275,14 +1274,14 @@ class TFRobertaPreLayerNormForCausalLM(TFRobertaPreLayerNormPreTrainedModel, TFC
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]: ) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1437,12 +1436,12 @@ class TFRobertaPreLayerNormForSequenceClassification(
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1529,12 +1528,12 @@ class TFRobertaPreLayerNormForMultipleChoice(TFRobertaPreLayerNormPreTrainedMode
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1638,12 +1637,12 @@ class TFRobertaPreLayerNormForTokenClassification(TFRobertaPreLayerNormPreTraine
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1729,13 +1728,13 @@ class TFRobertaPreLayerNormForQuestionAnswering(TFRobertaPreLayerNormPreTrainedM
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -17,7 +17,6 @@
from __future__ import annotations from __future__ import annotations
import math import math
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -156,9 +155,9 @@ class TFRoFormerEmbeddings(keras.layers.Layer):
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
token_type_ids: Optional[tf.Tensor] = None, token_type_ids: tf.Tensor | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
""" """
@ -524,7 +523,7 @@ class TFRoFormerEncoder(keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
@ -715,11 +714,11 @@ class TFRoFormerMainLayer(keras.layers.Layer):
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
@ -934,11 +933,11 @@ class TFRoFormerModel(TFRoFormerPreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
outputs = self.roformer( outputs = self.roformer(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -993,12 +992,12 @@ class TFRoFormerForMaskedLM(TFRoFormerPreTrainedModel, TFMaskedLanguageModelingL
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1072,12 +1071,12 @@ class TFRoFormerForCausalLM(TFRoFormerPreTrainedModel, TFCausalLanguageModelingL
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFCausalLMOutput, tuple[tf.Tensor]]: ) -> TFCausalLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ..., Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,
@ -1198,12 +1197,12 @@ class TFRoFormerForSequenceClassification(TFRoFormerPreTrainedModel, TFSequenceC
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1282,12 +1281,12 @@ class TFRoFormerForMultipleChoice(TFRoFormerPreTrainedModel, TFMultipleChoiceLos
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1389,12 +1388,12 @@ class TFRoFormerForTokenClassification(TFRoFormerPreTrainedModel, TFTokenClassif
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1471,13 +1470,13 @@ class TFRoFormerForQuestionAnswering(TFRoFormerPreTrainedModel, TFQuestionAnswer
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -21,7 +21,6 @@ from __future__ import annotations
import collections import collections
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -71,7 +70,7 @@ class TFSamVisionEncoderOutput(ModelOutput):
""" """
image_embeds: tf.Tensor | None = None image_embeds: tf.Tensor | None = None
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -105,8 +104,8 @@ class TFSamImageSegmentationOutput(ModelOutput):
heads. heads.
""" """
iou_scores: Optional[tf.Tensor] = None iou_scores: tf.Tensor | None = None
pred_masks: Optional[tf.Tensor] = None pred_masks: tf.Tensor | None = None
vision_hidden_states: tuple[tf.Tensor, ...] | None = None vision_hidden_states: tuple[tf.Tensor, ...] | None = None
vision_attentions: tuple[tf.Tensor, ...] | None = None vision_attentions: tuple[tf.Tensor, ...] | None = None
mask_decoder_attentions: tuple[tf.Tensor, ...] | None = None mask_decoder_attentions: tuple[tf.Tensor, ...] | None = None
@ -431,10 +430,10 @@ class TFSamTwoWayTransformer(keras.layers.Layer):
point_embeddings: tf.Tensor, point_embeddings: tf.Tensor,
image_embeddings: tf.Tensor, image_embeddings: tf.Tensor,
image_positional_embeddings: tf.Tensor, image_positional_embeddings: tf.Tensor,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -613,7 +612,7 @@ class TFSamMaskDecoder(keras.layers.Layer):
sparse_prompt_embeddings: tf.Tensor, sparse_prompt_embeddings: tf.Tensor,
dense_prompt_embeddings: tf.Tensor, dense_prompt_embeddings: tf.Tensor,
multimask_output: bool, multimask_output: bool,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
) -> tuple[tf.Tensor, tf.Tensor]: ) -> tuple[tf.Tensor, tf.Tensor]:
batch_size, num_channels, height, width = shape_list(image_embeddings) batch_size, num_channels, height, width = shape_list(image_embeddings)
point_batch_size = tf.math.maximum(1, tf.shape(sparse_prompt_embeddings)[1]) point_batch_size = tf.math.maximum(1, tf.shape(sparse_prompt_embeddings)[1])
@ -857,8 +856,8 @@ class TFSamPromptEncoder(keras.layers.Layer):
def call( def call(
self, self,
batch_size: Optional[int], batch_size: int | None,
input_points: Optional[tuple[tf.Tensor, tf.Tensor]], input_points: tuple[tf.Tensor, tf.Tensor] | None,
input_labels: tf.Tensor | None, input_labels: tf.Tensor | None,
input_boxes: tf.Tensor | None, input_boxes: tf.Tensor | None,
input_masks: tf.Tensor | None, input_masks: tf.Tensor | None,
@ -1119,8 +1118,8 @@ class TFSamVisionLayer(keras.layers.Layer):
def call( def call(
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
residual = hidden_states residual = hidden_states
@ -1268,11 +1267,11 @@ class TFSamVisionEncoder(keras.layers.Layer):
def call( def call(
self, self,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFSamVisionEncoderOutput]: ) -> tuple | TFSamVisionEncoderOutput:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -1506,9 +1505,9 @@ class TFSamModel(TFSamPreTrainedModel):
def get_image_embeddings( def get_image_embeddings(
self, self,
pixel_values, pixel_values,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
): ):
r""" r"""
Returns the image embeddings by passing the pixel values through the vision encoder. Returns the image embeddings by passing the pixel values through the vision encoder.

View File

@ -17,7 +17,6 @@
from __future__ import annotations from __future__ import annotations
import math import math
from typing import Optional, Union
import tensorflow as tf import tensorflow as tf
@ -169,7 +168,7 @@ class TFSegformerEfficientSelfAttention(keras.layers.Layer):
width: int, width: int,
output_attentions: bool = False, output_attentions: bool = False,
training: bool = False, training: bool = False,
) -> Union[tf.Tensor, tuple[tf.Tensor, tf.Tensor]]: ) -> tf.Tensor | tuple[tf.Tensor, tf.Tensor]:
batch_size = shape_list(hidden_states)[0] batch_size = shape_list(hidden_states)[0]
num_channels = shape_list(hidden_states)[2] num_channels = shape_list(hidden_states)[2]
@ -272,7 +271,7 @@ class TFSegformerAttention(keras.layers.Layer):
def call( def call(
self, hidden_states: tf.Tensor, height: int, width: int, output_attentions: bool = False self, hidden_states: tf.Tensor, height: int, width: int, output_attentions: bool = False
) -> Union[tf.Tensor, tuple[tf.Tensor, tf.Tensor]]: ) -> tf.Tensor | tuple[tf.Tensor, tf.Tensor]:
self_outputs = self.self(hidden_states, height, width, output_attentions) self_outputs = self.self(hidden_states, height, width, output_attentions)
attention_output = self.dense_output(self_outputs[0]) attention_output = self.dense_output(self_outputs[0])
@ -325,8 +324,8 @@ class TFSegformerMixFFN(keras.layers.Layer):
self, self,
config: SegformerConfig, config: SegformerConfig,
in_features: int, in_features: int,
hidden_features: Optional[int] = None, hidden_features: int | None = None,
out_features: Optional[int] = None, out_features: int | None = None,
**kwargs, **kwargs,
): ):
super().__init__(**kwargs) super().__init__(**kwargs)
@ -499,11 +498,11 @@ class TFSegformerEncoder(keras.layers.Layer):
def call( def call(
self, self,
pixel_values: tf.Tensor, pixel_values: tf.Tensor,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
output_hidden_states: Optional[bool] = False, output_hidden_states: bool | None = False,
return_dict: Optional[bool] = True, return_dict: bool | None = True,
training: bool = False, training: bool = False,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None all_self_attentions = () if output_attentions else None
@ -580,11 +579,11 @@ class TFSegformerMainLayer(keras.layers.Layer):
def call( def call(
self, self,
pixel_values: tf.Tensor, pixel_values: tf.Tensor,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -714,11 +713,11 @@ class TFSegformerModel(TFSegformerPreTrainedModel):
def call( def call(
self, self,
pixel_values: tf.Tensor, pixel_values: tf.Tensor,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
outputs = self.segformer( outputs = self.segformer(
pixel_values, pixel_values,
output_attentions=output_attentions, output_attentions=output_attentions,
@ -767,10 +766,10 @@ class TFSegformerForImageClassification(TFSegformerPreTrainedModel, TFSequenceCl
self, self,
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
) -> Union[tuple, TFSequenceClassifierOutput]: ) -> tuple | TFSequenceClassifierOutput:
outputs = self.segformer( outputs = self.segformer(
pixel_values, pixel_values,
output_attentions=output_attentions, output_attentions=output_attentions,
@ -951,10 +950,10 @@ class TFSegformerForSemanticSegmentation(TFSegformerPreTrainedModel):
self, self,
pixel_values: tf.Tensor, pixel_values: tf.Tensor,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
) -> Union[tuple, TFSemanticSegmenterOutput]: ) -> tuple | TFSemanticSegmenterOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, height, width)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, height, width)`, *optional*):
Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ..., Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,

View File

@ -17,7 +17,6 @@
from __future__ import annotations from __future__ import annotations
import random import random
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -102,7 +101,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -173,7 +172,7 @@ class TFConv1dSubsampler(keras.layers.Layer):
class TFSpeech2TextSinusoidalPositionalEmbedding(keras.layers.Layer): class TFSpeech2TextSinusoidalPositionalEmbedding(keras.layers.Layer):
"""This module produces sinusoidal positional embeddings of any length.""" """This module produces sinusoidal positional embeddings of any length."""
def __init__(self, num_positions: int, embedding_dim: int, padding_idx: Optional[int] = None, **kwargs): def __init__(self, num_positions: int, embedding_dim: int, padding_idx: int | None = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.offset = 2 self.offset = 2
self.embedding_dim = embedding_dim self.embedding_dim = embedding_dim
@ -181,7 +180,7 @@ class TFSpeech2TextSinusoidalPositionalEmbedding(keras.layers.Layer):
self.embedding_weights = self._get_embedding(num_positions + self.offset, embedding_dim, padding_idx) self.embedding_weights = self._get_embedding(num_positions + self.offset, embedding_dim, padding_idx)
@staticmethod @staticmethod
def _get_embedding(num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None) -> tf.Tensor: def _get_embedding(num_embeddings: int, embedding_dim: int, padding_idx: int | None = None) -> tf.Tensor:
""" """
Build sinusoidal embeddings. This matches the implementation in tensor2tensor, but differs slightly from the Build sinusoidal embeddings. This matches the implementation in tensor2tensor, but differs slightly from the
description in Section 3.5 of "Attention Is All You Need". description in Section 3.5 of "Attention Is All You Need".
@ -214,7 +213,7 @@ class TFSpeech2TextSinusoidalPositionalEmbedding(keras.layers.Layer):
@staticmethod @staticmethod
def create_position_ids_from_input_ids( def create_position_ids_from_input_ids(
input_ids: tf.Tensor, padding_idx: int, past_key_values_length: Optional[int] = 0 input_ids: tf.Tensor, padding_idx: int, past_key_values_length: int | None = 0
) -> tf.Tensor: ) -> tf.Tensor:
""" """
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding
@ -271,7 +270,7 @@ class TFSpeech2TextAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]: ) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -1346,15 +1345,15 @@ class TFSpeech2TextModel(TFSpeech2TextPreTrainedModel):
decoder_head_mask: np.ndarray | tf.Tensor | None = None, decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: np.ndarray | tf.Tensor | None = None, encoder_outputs: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
**kwargs, **kwargs,
) -> Union[tuple, TFSeq2SeqModelOutput]: ) -> tuple | TFSeq2SeqModelOutput:
outputs = self.model( outputs = self.model(
input_features=input_features, input_features=input_features,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -1445,16 +1444,16 @@ class TFSpeech2TextForConditionalGeneration(TFSpeech2TextPreTrainedModel, TFCaus
decoder_head_mask: np.ndarray | tf.Tensor | None = None, decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: np.ndarray | tf.Tensor | None = None, encoder_outputs: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs, **kwargs,
) -> Union[tuple, TFSeq2SeqLMOutput]: ) -> tuple | TFSeq2SeqLMOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -22,7 +22,7 @@ import warnings
from collections.abc import Iterable from collections.abc import Iterable
from dataclasses import dataclass from dataclasses import dataclass
from functools import partial from functools import partial
from typing import Any, Callable, Optional, Union from typing import Any, Callable
import tensorflow as tf import tensorflow as tf
@ -92,7 +92,7 @@ class TFSwinEncoderOutput(ModelOutput):
include the spatial dimensions. include the spatial dimensions.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
reshaped_hidden_states: tuple[tf.Tensor, ...] | None = None reshaped_hidden_states: tuple[tf.Tensor, ...] | None = None
@ -127,7 +127,7 @@ class TFSwinModelOutput(ModelOutput):
include the spatial dimensions. include the spatial dimensions.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
pooler_output: tf.Tensor | None = None pooler_output: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -164,7 +164,7 @@ class TFSwinMaskedImageModelingOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
reconstruction: Optional[tf.Tensor] = None reconstruction: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
reshaped_hidden_states: tuple[tf.Tensor, ...] | None = None reshaped_hidden_states: tuple[tf.Tensor, ...] | None = None
@ -209,7 +209,7 @@ class TFSwinImageClassifierOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
reshaped_hidden_states: tuple[tf.Tensor, ...] | None = None reshaped_hidden_states: tuple[tf.Tensor, ...] | None = None
@ -308,7 +308,7 @@ class TFSwinEmbeddings(keras.layers.Layer):
self.dropout.build(None) self.dropout.build(None)
def call( def call(
self, pixel_values: tf.Tensor, bool_masked_pos: Optional[bool] = None, training: bool = False self, pixel_values: tf.Tensor, bool_masked_pos: bool | None = None, training: bool = False
) -> tuple[tf.Tensor, tuple[int, int]]: ) -> tuple[tf.Tensor, tuple[int, int]]:
embeddings, output_dimensions = self.patch_embeddings(pixel_values, training=training) embeddings, output_dimensions = self.patch_embeddings(pixel_values, training=training)
embeddings = self.norm(embeddings, training=training) embeddings = self.norm(embeddings, training=training)
@ -413,7 +413,7 @@ class TFSwinPatchMerging(keras.layers.Layer):
""" """
def __init__( def __init__(
self, input_resolution: tuple[int, int], dim: int, norm_layer: Optional[Callable] = None, **kwargs self, input_resolution: tuple[int, int], dim: int, norm_layer: Callable | None = None, **kwargs
) -> None: ) -> None:
super().__init__(**kwargs) super().__init__(**kwargs)
self.input_resolution = input_resolution self.input_resolution = input_resolution
@ -475,7 +475,7 @@ class TFSwinPatchMerging(keras.layers.Layer):
class TFSwinDropPath(keras.layers.Layer): class TFSwinDropPath(keras.layers.Layer):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks)."""
def __init__(self, drop_prob: Optional[float] = None, scale_by_keep: bool = True, **kwargs) -> None: def __init__(self, drop_prob: float | None = None, scale_by_keep: bool = True, **kwargs) -> None:
super().__init__(**kwargs) super().__init__(**kwargs)
self.drop_prob = drop_prob self.drop_prob = drop_prob
self.scale_by_keep = scale_by_keep self.scale_by_keep = scale_by_keep
@ -908,7 +908,7 @@ class TFSwinStage(keras.layers.Layer):
depth: int, depth: int,
num_heads: int, num_heads: int,
drop_path: list[float], drop_path: list[float],
downsample: Optional[Callable], downsample: Callable | None,
**kwargs, **kwargs,
) -> None: ) -> None:
super().__init__(**kwargs) super().__init__(**kwargs)
@ -945,7 +945,7 @@ class TFSwinStage(keras.layers.Layer):
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
input_dimensions: tuple[int, int], input_dimensions: tuple[int, int],
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor, ...]: ) -> tuple[tf.Tensor, ...]:
height, width = input_dimensions height, width = input_dimensions
@ -1015,7 +1015,7 @@ class TFSwinEncoder(keras.layers.Layer):
output_hidden_states: bool = False, output_hidden_states: bool = False,
return_dict: bool = True, return_dict: bool = True,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor, ...], TFSwinEncoderOutput]: ) -> tuple[tf.Tensor, ...] | TFSwinEncoderOutput:
all_input_dimensions = () all_input_dimensions = ()
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_reshaped_hidden_states = () if output_hidden_states else None all_reshaped_hidden_states = () if output_hidden_states else None
@ -1157,9 +1157,9 @@ class AdaptiveAveragePooling1D(keras.layers.Layer):
def __init__( def __init__(
self, self,
output_size: Union[int, Iterable[int]], output_size: int | Iterable[int],
reduce_function: Callable = tf.reduce_mean, reduce_function: Callable = tf.reduce_mean,
data_format: Optional[str] = None, data_format: str | None = None,
**kwargs, **kwargs,
) -> None: ) -> None:
self.data_format = normalize_data_format(data_format) self.data_format = normalize_data_format(data_format)
@ -1225,7 +1225,7 @@ class TFSwinMainLayer(keras.layers.Layer):
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads) self.encoder.layer[layer].attention.prune_heads(heads)
def get_head_mask(self, head_mask: Optional[Any]) -> list: def get_head_mask(self, head_mask: Any | None) -> list:
if head_mask is not None: if head_mask is not None:
raise NotImplementedError raise NotImplementedError
return [None] * len(self.config.depths) return [None] * len(self.config.depths)
@ -1236,11 +1236,11 @@ class TFSwinMainLayer(keras.layers.Layer):
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None, bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFSwinModelOutput, tuple[tf.Tensor, ...]]: ) -> TFSwinModelOutput | tuple[tf.Tensor, ...]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -1332,11 +1332,11 @@ class TFSwinModel(TFSwinPreTrainedModel):
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None, bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFSwinModelOutput, tuple[tf.Tensor, ...]]: ) -> TFSwinModelOutput | tuple[tf.Tensor, ...]:
r""" r"""
bool_masked_pos (`tf.Tensor` of shape `(batch_size, num_patches)`, *optional*): bool_masked_pos (`tf.Tensor` of shape `(batch_size, num_patches)`, *optional*):
Boolean masked positions. Indicates which patches are masked (1) and which aren't (0). Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
@ -1449,11 +1449,11 @@ class TFSwinForMaskedImageModeling(TFSwinPreTrainedModel):
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None, bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple, TFSwinMaskedImageModelingOutput]: ) -> tuple | TFSwinMaskedImageModelingOutput:
r""" r"""
bool_masked_pos (`tf.Tensor` of shape `(batch_size, num_patches)`): bool_masked_pos (`tf.Tensor` of shape `(batch_size, num_patches)`):
Boolean masked positions. Indicates which patches are masked (1) and which aren't (0). Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
@ -1583,11 +1583,11 @@ class TFSwinForImageClassification(TFSwinPreTrainedModel, TFSequenceClassificati
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor, ...], TFSwinImageClassifierOutput]: ) -> tuple[tf.Tensor, ...] | TFSwinImageClassifierOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ..., Labels for computing the image classification/regression loss. Indices should be in `[0, ...,

View File

@ -21,7 +21,6 @@ import copy
import itertools import itertools
import math import math
import warnings import warnings
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -1210,15 +1209,15 @@ class TFT5Model(TFT5PreTrainedModel):
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None, decoder_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: np.ndarray | tf.Tensor | None = None, encoder_outputs: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFSeq2SeqModelOutput]: ) -> tuple | TFSeq2SeqModelOutput:
r""" r"""
Returns: Returns:
@ -1387,16 +1386,16 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None, decoder_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: np.ndarray | tf.Tensor | None = None, encoder_outputs: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFSeq2SeqLMOutput]: ) -> tuple | TFSeq2SeqLMOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ..., Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,
@ -1620,11 +1619,11 @@ class TFT5EncoderModel(TFT5PreTrainedModel):
attention_mask: np.ndarray | tf.Tensor | None = None, attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFBaseModelOutput]: ) -> tuple | TFBaseModelOutput:
r""" r"""
Returns: Returns:

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import enum import enum
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -111,7 +110,7 @@ class TFTableQuestionAnsweringOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
logits_aggregation: tf.Tensor | None = None logits_aggregation: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -170,10 +169,10 @@ class TFTapasEmbeddings(keras.layers.Layer):
def call( def call(
self, self,
input_ids: Optional[tf.Tensor] = None, input_ids: tf.Tensor | None = None,
position_ids: Optional[tf.Tensor] = None, position_ids: tf.Tensor | None = None,
token_type_ids: Optional[tf.Tensor] = None, token_type_ids: tf.Tensor | None = None,
inputs_embeds: Optional[tf.Tensor] = None, inputs_embeds: tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> tf.Tensor: ) -> tf.Tensor:
""" """
@ -627,12 +626,12 @@ class TFTapasEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None, encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None, encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None, past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool], use_cache: bool | None,
output_attentions: bool, output_attentions: bool,
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -865,11 +864,11 @@ class TFTapasMainLayer(keras.layers.Layer):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
@ -1100,11 +1099,11 @@ class TFTapasModel(TFTapasPreTrainedModel):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:
@ -1182,12 +1181,12 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss):
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1404,12 +1403,12 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
float_answer: np.ndarray | tf.Tensor | None = None, float_answer: np.ndarray | tf.Tensor | None = None,
numeric_values: np.ndarray | tf.Tensor | None = None, numeric_values: np.ndarray | tf.Tensor | None = None,
numeric_values_scale: np.ndarray | tf.Tensor | None = None, numeric_values_scale: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTableQuestionAnsweringOutput, tuple[tf.Tensor]]: ) -> TFTableQuestionAnsweringOutput | tuple[tf.Tensor]:
r""" r"""
table_mask (`tf.Tensor` of shape `(batch_size, seq_length)`, *optional*): table_mask (`tf.Tensor` of shape `(batch_size, seq_length)`, *optional*):
Mask for the table. Indicates which tokens belong to the table (1). Question tokens, table headers and Mask for the table. Indicates which tokens belong to the table (1). Question tokens, table headers and
@ -1731,12 +1730,12 @@ class TFTapasForSequenceClassification(TFTapasPreTrainedModel, TFSequenceClassif
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import re import re
import warnings import warnings
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -187,9 +186,9 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos
def __init__( def __init__(
self, self,
config: Optional[PretrainedConfig] = None, config: PretrainedConfig | None = None,
encoder: Optional[TFPreTrainedModel] = None, encoder: TFPreTrainedModel | None = None,
decoder: Optional[TFPreTrainedModel] = None, decoder: TFPreTrainedModel | None = None,
): ):
if config is None and (encoder is None or decoder is None): if config is None and (encoder is None or decoder is None):
raise ValueError("Either a configuration or an encoder and a decoder has to be provided.") raise ValueError("Either a configuration or an encoder and a decoder has to be provided.")
@ -309,8 +308,8 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos
@classmethod @classmethod
def from_encoder_decoder_pretrained( def from_encoder_decoder_pretrained(
cls, cls,
encoder_pretrained_model_name_or_path: Optional[str] = None, encoder_pretrained_model_name_or_path: str | None = None,
decoder_pretrained_model_name_or_path: Optional[str] = None, decoder_pretrained_model_name_or_path: str | None = None,
*model_args, *model_args,
**kwargs, **kwargs,
) -> TFPreTrainedModel: ) -> TFPreTrainedModel:
@ -462,17 +461,17 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos
pixel_values: np.ndarray | tf.Tensor | None = None, pixel_values: np.ndarray | tf.Tensor | None = None,
decoder_input_ids: np.ndarray | tf.Tensor | None = None, decoder_input_ids: np.ndarray | tf.Tensor | None = None,
decoder_attention_mask: np.ndarray | tf.Tensor | None = None, decoder_attention_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None, encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
**kwargs, **kwargs,
) -> Union[TFSeq2SeqLMOutput, tuple[tf.Tensor]]: ) -> TFSeq2SeqLMOutput | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:

View File

@ -17,7 +17,6 @@
from __future__ import annotations from __future__ import annotations
import re import re
from typing import Optional, Union
import tensorflow as tf import tensorflow as tf
@ -178,9 +177,9 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel):
def __init__( def __init__(
self, self,
config: Optional[VisionTextDualEncoderConfig] = None, config: VisionTextDualEncoderConfig | None = None,
vision_model: Optional[TFPreTrainedModel] = None, vision_model: TFPreTrainedModel | None = None,
text_model: Optional[TFPreTrainedModel] = None, text_model: TFPreTrainedModel | None = None,
): ):
if config is None and (vision_model is None or text_model is None): if config is None and (vision_model is None or text_model is None):
raise ValueError("Either a configuration or an vision and a text model has to be provided") raise ValueError("Either a configuration or an vision and a text model has to be provided")
@ -351,13 +350,13 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel):
pixel_values: tf.Tensor | None = None, pixel_values: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
return_loss: Optional[bool] = None, return_loss: bool | None = None,
token_type_ids: tf.Tensor | None = None, token_type_ids: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], TFCLIPOutput]: ) -> tuple[tf.Tensor] | TFCLIPOutput:
r""" r"""
Returns: Returns:
@ -465,8 +464,8 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel):
@classmethod @classmethod
def from_vision_text_pretrained( def from_vision_text_pretrained(
cls, cls,
vision_model_name_or_path: Optional[str] = None, vision_model_name_or_path: str | None = None,
text_model_name_or_path: Optional[str] = None, text_model_name_or_path: str | None = None,
*model_args, *model_args,
**kwargs, **kwargs,
) -> TFPreTrainedModel: ) -> TFPreTrainedModel:

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import collections.abc import collections.abc
import math import math
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -514,7 +513,7 @@ class TFViTEncoder(keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
@ -583,12 +582,12 @@ class TFViTMainLayer(keras.layers.Layer):
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
interpolate_pos_encoding: Optional[bool] = None, interpolate_pos_encoding: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if pixel_values is None: if pixel_values is None:
raise ValueError("You have to specify pixel_values") raise ValueError("You have to specify pixel_values")
@ -756,12 +755,12 @@ class TFViTModel(TFViTPreTrainedModel):
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
interpolate_pos_encoding: Optional[bool] = None, interpolate_pos_encoding: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
outputs = self.vit( outputs = self.vit(
pixel_values=pixel_values, pixel_values=pixel_values,
head_mask=head_mask, head_mask=head_mask,
@ -854,13 +853,13 @@ class TFViTForImageClassification(TFViTPreTrainedModel, TFSequenceClassification
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
interpolate_pos_encoding: Optional[bool] = None, interpolate_pos_encoding: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ..., Labels for computing the image classification/regression loss. Indices should be in `[0, ...,

View File

@ -20,7 +20,6 @@ import collections.abc
import math import math
from copy import deepcopy from copy import deepcopy
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -74,9 +73,9 @@ class TFViTMAEModelOutput(ModelOutput):
the self-attention heads. the self-attention heads.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
mask: Optional[tf.Tensor] = None mask: tf.Tensor | None = None
ids_restore: Optional[tf.Tensor] = None ids_restore: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -99,7 +98,7 @@ class TFViTMAEDecoderOutput(ModelOutput):
the self-attention heads. the self-attention heads.
""" """
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -129,9 +128,9 @@ class TFViTMAEForPreTrainingOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
mask: Optional[tf.Tensor] = None mask: tf.Tensor | None = None
ids_restore: Optional[tf.Tensor] = None ids_restore: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -314,7 +313,7 @@ class TFViTMAEEmbeddings(keras.layers.Layer):
return sequence_unmasked, mask, ids_restore return sequence_unmasked, mask, ids_restore
def call( def call(
self, pixel_values: tf.Tensor, noise: Optional[tf.Tensor] = None, interpolate_pos_encoding: bool = False self, pixel_values: tf.Tensor, noise: tf.Tensor | None = None, interpolate_pos_encoding: bool = False
) -> tf.Tensor: ) -> tf.Tensor:
batch_size, num_channels, height, width = shape_list(pixel_values) batch_size, num_channels, height, width = shape_list(pixel_values)
embeddings = self.patch_embeddings(pixel_values, interpolate_pos_encoding=interpolate_pos_encoding) embeddings = self.patch_embeddings(pixel_values, interpolate_pos_encoding=interpolate_pos_encoding)
@ -708,7 +707,7 @@ class TFViTMAEEncoder(keras.layers.Layer):
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
@ -775,14 +774,14 @@ class TFViTMAEMainLayer(keras.layers.Layer):
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
noise: Optional[tf.Tensor] = None, noise: tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
interpolate_pos_encoding: bool = False, interpolate_pos_encoding: bool = False,
) -> Union[TFViTMAEModelOutput, tuple[tf.Tensor]]: ) -> TFViTMAEModelOutput | tuple[tf.Tensor]:
embedding_output, mask, ids_restore = self.embeddings( embedding_output, mask, ids_restore = self.embeddings(
pixel_values=pixel_values, pixel_values=pixel_values,
training=training, training=training,
@ -943,14 +942,14 @@ class TFViTMAEModel(TFViTMAEPreTrainedModel):
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
noise: Optional[tf.Tensor] = None, noise: tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
interpolate_pos_encoding: bool = False, interpolate_pos_encoding: bool = False,
) -> Union[TFViTMAEModelOutput, tuple[tf.Tensor]]: ) -> TFViTMAEModelOutput | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:
@ -1219,7 +1218,7 @@ class TFViTMAEForPreTraining(TFViTMAEPreTrainedModel):
) )
return patchified_pixel_values return patchified_pixel_values
def unpatchify(self, patchified_pixel_values, original_image_size: Optional[tuple[int, int]] = None): def unpatchify(self, patchified_pixel_values, original_image_size: tuple[int, int] | None = None):
""" """
Args: Args:
patchified_pixel_values (`tf.Tensor` of shape `(batch_size, num_patches, patch_size**2 * num_channels)`: patchified_pixel_values (`tf.Tensor` of shape `(batch_size, num_patches, patch_size**2 * num_channels)`:
@ -1294,14 +1293,14 @@ class TFViTMAEForPreTraining(TFViTMAEPreTrainedModel):
def call( def call(
self, self,
pixel_values: TFModelInputType | None = None, pixel_values: TFModelInputType | None = None,
noise: Optional[tf.Tensor] = None, noise: tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
interpolate_pos_encoding: bool = False, interpolate_pos_encoding: bool = False,
) -> Union[TFViTMAEForPreTrainingOutput, tuple[tf.Tensor]]: ) -> TFViTMAEForPreTrainingOutput | tuple[tf.Tensor]:
r""" r"""
Returns: Returns:

View File

@ -18,7 +18,7 @@ from __future__ import annotations
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Optional, Union from typing import Any
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -78,8 +78,8 @@ class TFWav2Vec2BaseModelOutput(ModelOutput):
heads. heads.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
extract_features: Optional[tf.Tensor] = None extract_features: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None attentions: tuple[tf.Tensor] | None = None
@ -184,7 +184,7 @@ def _compute_mask_indices(
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -729,7 +729,7 @@ class TFWav2Vec2Attention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]: ) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -922,7 +922,7 @@ class TFWav2Vec2EncoderLayer(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
attn_residual = hidden_states attn_residual = hidden_states
@ -981,7 +981,7 @@ class TFWav2Vec2EncoderLayerStableLayerNorm(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
training: bool = False, training: bool = False,
) -> tuple[tf.Tensor]: ) -> tuple[tf.Tensor]:
attn_residual = hidden_states attn_residual = hidden_states
@ -1031,11 +1031,11 @@ class TFWav2Vec2Encoder(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
output_hidden_states: Optional[bool] = False, output_hidden_states: bool | None = False,
return_dict: Optional[bool] = True, return_dict: bool | None = True,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None all_self_attentions = () if output_attentions else None
@ -1113,11 +1113,11 @@ class TFWav2Vec2EncoderStableLayerNorm(keras.layers.Layer):
self, self,
hidden_states: tf.Tensor, hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
output_hidden_states: Optional[bool] = False, output_hidden_states: bool | None = False,
return_dict: Optional[bool] = True, return_dict: bool | None = True,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None all_self_attentions = () if output_attentions else None
@ -1281,9 +1281,9 @@ class TFWav2Vec2MainLayer(keras.layers.Layer):
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
**kwargs: Any, **kwargs: Any,
): ):
@ -1516,11 +1516,11 @@ class TFWav2Vec2Model(TFWav2Vec2PreTrainedModel):
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
""" """
Returns: Returns:
@ -1622,12 +1622,12 @@ class TFWav2Vec2ForCTC(TFWav2Vec2PreTrainedModel):
position_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
labels: tf.Tensor | None = None, labels: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFCausalLMOutput, tuple[tf.Tensor]]: ) -> TFCausalLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import math import math
import random import random
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -111,7 +110,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -129,7 +128,7 @@ class TFWhisperPositionalEmbedding(keras.layers.Layer):
self, self,
num_positions: int, num_positions: int,
embedding_dim: int, embedding_dim: int,
padding_idx: Optional[int] = None, padding_idx: int | None = None,
embedding_initializer=None, embedding_initializer=None,
**kwargs, **kwargs,
): ):
@ -197,7 +196,7 @@ class TFWhisperAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]: ) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -1262,15 +1261,15 @@ class TFWhisperModel(TFWhisperPreTrainedModel):
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None, decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, encoder_outputs: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
decoder_inputs_embeds: Optional[tuple[Union[np.ndarray, tf.Tensor]]] = None, decoder_inputs_embeds: tuple[np.ndarray | tf.Tensor] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], TFSeq2SeqModelOutput]: ) -> tuple[tf.Tensor] | TFSeq2SeqModelOutput:
r""" r"""
Returns: Returns:
@ -1385,16 +1384,16 @@ class TFWhisperForConditionalGeneration(TFWhisperPreTrainedModel, TFCausalLangua
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None, decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, encoder_outputs: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
decoder_inputs_embeds: Optional[tuple[Union[np.ndarray, tf.Tensor]]] = None, decoder_inputs_embeds: tuple[np.ndarray | tf.Tensor] | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[tuple[tf.Tensor], TFSeq2SeqLMOutput]: ) -> tuple[tf.Tensor] | TFSeq2SeqLMOutput:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the language modeling loss. Indices should either be in `[0, ..., config.vocab_size]` Labels for computing the language modeling loss. Indices should either be in `[0, ..., config.vocab_size]`
@ -1473,15 +1472,15 @@ class TFWhisperForConditionalGeneration(TFWhisperPreTrainedModel, TFCausalLangua
def generate( def generate(
self, self,
inputs: Optional[tf.Tensor] = None, inputs: tf.Tensor | None = None,
generation_config: Optional[GenerationConfig] = None, generation_config: GenerationConfig | None = None,
logits_processor: Optional[TFLogitsProcessorList] = None, logits_processor: TFLogitsProcessorList | None = None,
seed: Optional[list[int]] = None, seed: list[int] | None = None,
return_timestamps: Optional[bool] = None, return_timestamps: bool | None = None,
task: Optional[str] = None, task: str | None = None,
language: Optional[str] = None, language: str | None = None,
is_multilingual: Optional[bool] = None, is_multilingual: bool | None = None,
prompt_ids: Optional[tf.Tensor] = None, prompt_ids: tf.Tensor | None = None,
return_token_timestamps=None, return_token_timestamps=None,
**kwargs, **kwargs,
): ):

View File

@ -18,7 +18,7 @@ from __future__ import annotations
import math import math
import random import random
from typing import Any, Optional, Union from typing import Any
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -57,7 +57,7 @@ _CONFIG_FOR_DOC = "XGLMConfig"
LARGE_NEGATIVE = -1e8 LARGE_NEGATIVE = -1e8
def create_sinusoidal_positions(num_positions: int, embedding_dim: int, padding_idx: Optional[int]) -> tf.Tensor: def create_sinusoidal_positions(num_positions: int, embedding_dim: int, padding_idx: int | None) -> tf.Tensor:
half_dim = embedding_dim // 2 half_dim = embedding_dim // 2
emb = math.log(10000) / (half_dim - 1) emb = math.log(10000) / (half_dim - 1)
emb = tf.exp(tf.range(half_dim, dtype=tf.float32) * -emb) emb = tf.exp(tf.range(half_dim, dtype=tf.float32) * -emb)
@ -81,7 +81,7 @@ def create_sinusoidal_positions(num_positions: int, embedding_dim: int, padding_
def _create_position_ids_from_input_ids( def _create_position_ids_from_input_ids(
input_ids: tf.Tensor, past_key_values_length: int, padding_idx: Optional[int] input_ids: tf.Tensor, past_key_values_length: int, padding_idx: int | None
) -> tf.Tensor: ) -> tf.Tensor:
""" """
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
@ -94,7 +94,7 @@ def _create_position_ids_from_input_ids(
def _create_position_ids_from_inputs_embeds( def _create_position_ids_from_inputs_embeds(
inputs_embeds: tf.Tensor, past_key_values_length: int, padding_idx: Optional[int] inputs_embeds: tf.Tensor, past_key_values_length: int, padding_idx: int | None
) -> tf.Tensor: ) -> tf.Tensor:
""" """
Args: Args:
@ -129,7 +129,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask # Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None): def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
""" """
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
""" """
@ -184,7 +184,7 @@ class TFXGLMAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None, past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None, attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]: ) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel""" """Input shape: Batch x Time x Channel"""
@ -356,7 +356,7 @@ class TFXGLMDecoderLayer(keras.layers.Layer):
layer_head_mask: tf.Tensor | None = None, layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None, cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: tuple[tf.Tensor] | None = None, past_key_value: tuple[tf.Tensor] | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]: ) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
""" """
Args: Args:
@ -459,7 +459,7 @@ class TFXGLMMainLayer(keras.layers.Layer):
config_class = XGLMConfig config_class = XGLMConfig
def __init__( def __init__(
self, config: XGLMConfig, embed_tokens: Optional[TFSharedEmbeddings] = None, *inputs, **kwargs: Any self, config: XGLMConfig, embed_tokens: TFSharedEmbeddings | None = None, *inputs, **kwargs: Any
) -> None: ) -> None:
super().__init__(*inputs, **kwargs) super().__init__(*inputs, **kwargs)
@ -525,15 +525,15 @@ class TFXGLMMainLayer(keras.layers.Layer):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs: Any, **kwargs: Any,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -796,7 +796,7 @@ class TFXGLMModel(TFXGLMPreTrainedModel):
""" """
def __init__( def __init__(
self, config: XGLMConfig, embed_tokens: Optional[TFSharedEmbeddings] = None, *inputs: Any, **kwargs: Any self, config: XGLMConfig, embed_tokens: TFSharedEmbeddings | None = None, *inputs: Any, **kwargs: Any
) -> None: ) -> None:
super().__init__(config, *inputs, **kwargs) super().__init__(config, *inputs, **kwargs)
@ -818,15 +818,15 @@ class TFXGLMModel(TFXGLMPreTrainedModel):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs: Any, **kwargs: Any,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
outputs = self.model( outputs = self.model(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -872,7 +872,7 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
] ]
def __init__( def __init__(
self, config: XGLMConfig, embed_tokens: Optional[TFSharedEmbeddings] = None, *inputs: Any, **kwargs: Any self, config: XGLMConfig, embed_tokens: TFSharedEmbeddings | None = None, *inputs: Any, **kwargs: Any
) -> None: ) -> None:
super().__init__(config, *inputs, **kwargs) super().__init__(config, *inputs, **kwargs)
@ -929,16 +929,16 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
**kwargs: Any, **kwargs: Any,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]: ) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r""" r"""
labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set

View File

@ -21,7 +21,6 @@ from __future__ import annotations
import itertools import itertools
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -400,7 +399,7 @@ class TFXLMMainLayer(keras.layers.Layer):
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
training=False, training=False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]: ) -> TFBaseModelOutput | tuple[tf.Tensor]:
# removed: src_enc=None, src_len=None # removed: src_enc=None, src_len=None
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
@ -599,7 +598,7 @@ class TFXLMWithLMHeadModelOutput(ModelOutput):
heads. heads.
""" """
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -881,14 +880,14 @@ class TFXLMWithLMHeadModel(TFXLMPreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None, lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None, cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFXLMWithLMHeadModelOutput, tuple[tf.Tensor]]: ) -> TFXLMWithLMHeadModelOutput | tuple[tf.Tensor]:
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -957,15 +956,15 @@ class TFXLMForSequenceClassification(TFXLMPreTrainedModel, TFSequenceClassificat
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None, lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None, cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1068,15 +1067,15 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss):
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None, lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None, cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
if input_ids is not None: if input_ids is not None:
num_choices = shape_list(input_ids)[1] num_choices = shape_list(input_ids)[1]
seq_length = shape_list(input_ids)[2] seq_length = shape_list(input_ids)[2]
@ -1184,15 +1183,15 @@ class TFXLMForTokenClassification(TFXLMPreTrainedModel, TFTokenClassificationLos
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None, lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None, cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1273,16 +1272,16 @@ class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel, TFQuestionAnsweringL
token_type_ids: np.ndarray | tf.Tensor | None = None, token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None, lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None, cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math import math
import warnings import warnings
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -686,12 +685,12 @@ class TFXLMRobertaEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None, encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None, encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None, past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool], use_cache: bool | None,
output_attentions: bool, output_attentions: bool,
output_hidden_states: bool, output_hidden_states: bool,
return_dict: bool, return_dict: bool,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -800,13 +799,13 @@ class TFXLMRobertaMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]: ) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder: if not self.config.is_decoder:
use_cache = False use_cache = False
@ -1000,13 +999,13 @@ class TFXLMRobertaModel(TFXLMRobertaPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[tuple, TFBaseModelOutputWithPoolingAndCrossAttentions]: ) -> tuple | TFBaseModelOutputWithPoolingAndCrossAttentions:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1153,12 +1152,12 @@ class TFXLMRobertaForMaskedLM(TFXLMRobertaPreTrainedModel, TFMaskedLanguageModel
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]: ) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1261,14 +1260,14 @@ class TFXLMRobertaForCausalLM(TFXLMRobertaPreTrainedModel, TFCausalLanguageModel
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None, encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: Optional[bool] = None, use_cache: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]: ) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r""" r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1421,12 +1420,12 @@ class TFXLMRobertaForSequenceClassification(TFXLMRobertaPreTrainedModel, TFSeque
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]: ) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1513,12 +1512,12 @@ class TFXLMRobertaForMultipleChoice(TFXLMRobertaPreTrainedModel, TFMultipleChoic
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]: ) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1622,12 +1621,12 @@ class TFXLMRobertaForTokenClassification(TFXLMRobertaPreTrainedModel, TFTokenCla
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]: ) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1713,13 +1712,13 @@ class TFXLMRobertaForQuestionAnswering(TFXLMRobertaPreTrainedModel, TFQuestionAn
position_ids: np.ndarray | tf.Tensor | None = None, position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False, training: bool | None = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]: ) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -21,7 +21,6 @@ from __future__ import annotations
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Union
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@ -201,7 +200,7 @@ class TFXLNetRelativeAttention(keras.layers.Layer):
mems: np.ndarray | tf.Tensor | None = None, mems: np.ndarray | tf.Tensor | None = None,
target_mapping: np.ndarray | tf.Tensor | None = None, target_mapping: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
training: bool = False, training: bool = False,
): ):
if g is not None: if g is not None:
@ -390,7 +389,7 @@ class TFXLNetLayer(keras.layers.Layer):
mems: np.ndarray | tf.Tensor | None = None, mems: np.ndarray | tf.Tensor | None = None,
target_mapping: np.ndarray | tf.Tensor | None = None, target_mapping: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = False, output_attentions: bool | None = False,
training: bool = False, training: bool = False,
): ):
outputs = self.rel_attn( outputs = self.rel_attn(
@ -631,10 +630,10 @@ class TFXLNetMainLayer(keras.layers.Layer):
input_mask: np.ndarray | tf.Tensor | None = None, input_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None, use_mems: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
): ):
if training and use_mems is None: if training and use_mems is None:
@ -863,7 +862,7 @@ class TFXLNetModelOutput(ModelOutput):
heads. heads.
""" """
last_hidden_state: Optional[tf.Tensor] = None last_hidden_state: tf.Tensor | None = None
mems: list[tf.Tensor] | None = None mems: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -900,7 +899,7 @@ class TFXLNetLMHeadModelOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
mems: list[tf.Tensor] | None = None mems: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -934,7 +933,7 @@ class TFXLNetForSequenceClassificationOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
mems: list[tf.Tensor] | None = None mems: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -968,7 +967,7 @@ class TFXLNetForTokenClassificationOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
mems: list[tf.Tensor] | None = None mems: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -1004,7 +1003,7 @@ class TFXLNetForMultipleChoiceOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None logits: tf.Tensor | None = None
mems: list[tf.Tensor] | None = None mems: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -1040,8 +1039,8 @@ class TFXLNetForQuestionAnsweringSimpleOutput(ModelOutput):
""" """
loss: tf.Tensor | None = None loss: tf.Tensor | None = None
start_logits: Optional[tf.Tensor] = None start_logits: tf.Tensor | None = None
end_logits: Optional[tf.Tensor] = None end_logits: tf.Tensor | None = None
mems: list[tf.Tensor] | None = None mems: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None attentions: tuple[tf.Tensor, ...] | None = None
@ -1189,12 +1188,12 @@ class TFXLNetModel(TFXLNetPreTrainedModel):
input_mask: np.ndarray | tf.Tensor | None = None, input_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None, use_mems: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
training: bool = False, training: bool = False,
) -> Union[TFXLNetModelOutput, tuple[tf.Tensor]]: ) -> TFXLNetModelOutput | tuple[tf.Tensor]:
outputs = self.transformer( outputs = self.transformer(
input_ids=input_ids, input_ids=input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
@ -1297,13 +1296,13 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel, TFCausalLanguageModelingLoss):
input_mask: np.ndarray | tf.Tensor | None = None, input_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None, use_mems: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFXLNetLMHeadModelOutput, tuple[tf.Tensor]]: ) -> TFXLNetLMHeadModelOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ..., Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,
@ -1432,13 +1431,13 @@ class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel, TFSequenceClassif
input_mask: np.ndarray | tf.Tensor | None = None, input_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None, use_mems: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFXLNetForSequenceClassificationOutput, tuple[tf.Tensor]]: ) -> TFXLNetForSequenceClassificationOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1533,13 +1532,13 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss):
target_mapping: np.ndarray | tf.Tensor | None = None, target_mapping: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None, use_mems: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFXLNetForMultipleChoiceOutput, tuple[tf.Tensor]]: ) -> TFXLNetForMultipleChoiceOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*): labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]` Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1647,13 +1646,13 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel, TFTokenClassificatio
input_mask: np.ndarray | tf.Tensor | None = None, input_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None, use_mems: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None, labels: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFXLNetForTokenClassificationOutput, tuple[tf.Tensor]]: ) -> TFXLNetForTokenClassificationOutput | tuple[tf.Tensor]:
r""" r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`. Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1737,14 +1736,14 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel, TFQuestionAnswer
input_mask: np.ndarray | tf.Tensor | None = None, input_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None, head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None, inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None, use_mems: bool | None = None,
output_attentions: Optional[bool] = None, output_attentions: bool | None = None,
output_hidden_states: Optional[bool] = None, output_hidden_states: bool | None = None,
return_dict: Optional[bool] = None, return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None, start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None, end_positions: np.ndarray | tf.Tensor | None = None,
training: bool = False, training: bool = False,
) -> Union[TFXLNetForQuestionAnsweringSimpleOutput, tuple[tf.Tensor]]: ) -> TFXLNetForQuestionAnsweringSimpleOutput | tuple[tf.Tensor]:
r""" r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*): start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.