Use newer typing notation

Signed-off-by: cyy <cyyever@outlook.com>
This commit is contained in:
cyy 2025-06-20 17:15:06 +08:00
parent 31d30b7224
commit 1d5fd195df
70 changed files with 2176 additions and 2263 deletions

View File

@ -17,7 +17,7 @@ from __future__ import annotations
import asyncio
from queue import Queue
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING
if TYPE_CHECKING:
@ -206,7 +206,7 @@ class TextIteratorStreamer(TextStreamer):
"""
def __init__(
self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs
self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: float | None = None, **decode_kwargs
):
super().__init__(tokenizer, skip_prompt, **decode_kwargs)
self.text_queue = Queue()
@ -284,7 +284,7 @@ class AsyncTextIteratorStreamer(TextStreamer):
"""
def __init__(
self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs
self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: float | None = None, **decode_kwargs
):
super().__init__(tokenizer, skip_prompt, **decode_kwargs)
self.text_queue = asyncio.Queue()

View File

@ -17,7 +17,6 @@ import operator
import os
import re
from functools import partial, reduce
from typing import Optional, Union
import torch
import torch.distributed as dist
@ -93,7 +92,7 @@ def initialize_tensor_parallelism(tp_plan, tp_size=None):
return tp_device, device_map, device_mesh
def _blocks_to_block_sizes(total_size: int, blocks: Union[int, list[int]]) -> list[int]:
def _blocks_to_block_sizes(total_size: int, blocks: int | list[int]) -> list[int]:
"""
Convert block count or proportions to block sizes.
@ -119,7 +118,7 @@ def _blocks_to_block_sizes(total_size: int, blocks: Union[int, list[int]]) -> li
return [single_size] * blocks
def _get_parameter_tp_plan(parameter_name: str, tp_plan: dict[str, str]) -> Optional[str]:
def _get_parameter_tp_plan(parameter_name: str, tp_plan: dict[str, str]) -> str | None:
"""
Get the TP style for a parameter from the TP plan.
@ -366,8 +365,8 @@ class GatherParallel(TensorParallelLayer):
def __init__(
self,
*,
input_layouts: Optional[Placement] = None,
output_layouts: Optional[Placement] = None,
input_layouts: Placement | None = None,
output_layouts: Placement | None = None,
use_local_output: bool = True,
):
super().__init__()
@ -460,8 +459,8 @@ class ColwiseParallel(TensorParallelLayer):
def __init__(
self,
*,
input_layouts: Optional[Placement] = None,
output_layouts: Optional[Placement] = None,
input_layouts: Placement | None = None,
output_layouts: Placement | None = None,
use_local_output: bool = True,
use_dtensor=True,
):
@ -548,8 +547,8 @@ class RowwiseParallel(TensorParallelLayer):
def __init__(
self,
*,
input_layouts: Optional[Placement] = None,
output_layouts: Optional[Placement] = None,
input_layouts: Placement | None = None,
output_layouts: Placement | None = None,
use_local_output: bool = True,
use_dtensor=True,
):
@ -887,7 +886,7 @@ def shard_and_distribute_module(
return param
def verify_tp_plan(expected_keys: list[str], tp_plan: Optional[dict[str, str]]):
def verify_tp_plan(expected_keys: list[str], tp_plan: dict[str, str] | None):
"""
Verify the TP plan of the model, log a warning if the layers that were not sharded and the rules that were not applied.
"""

View File

@ -16,7 +16,6 @@ from __future__ import annotations
import warnings
from dataclasses import dataclass
from typing import Optional
import tensorflow as tf
@ -44,7 +43,7 @@ class TFBaseModelOutput(ModelOutput):
heads.
"""
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -64,8 +63,8 @@ class TFBaseModelOutputWithNoAttention(ModelOutput):
Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
"""
last_hidden_state: Optional[tf.Tensor] = None
hidden_states: Optional[tuple[tf.Tensor, ...]] = None
last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
@dataclass
@ -96,8 +95,8 @@ class TFBaseModelOutputWithPooling(ModelOutput):
heads.
"""
last_hidden_state: Optional[tf.Tensor] = None
pooler_output: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
pooler_output: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -119,9 +118,9 @@ class TFBaseModelOutputWithPoolingAndNoAttention(ModelOutput):
Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
"""
last_hidden_state: Optional[tf.Tensor] = None
pooler_output: Optional[tf.Tensor] = None
hidden_states: Optional[tuple[tf.Tensor, ...]] = None
last_hidden_state: tf.Tensor | None = None
pooler_output: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
@dataclass
@ -164,8 +163,8 @@ class TFBaseModelOutputWithPoolingAndCrossAttentions(ModelOutput):
weighted average in the cross-attention heads.
"""
last_hidden_state: Optional[tf.Tensor] = None
pooler_output: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
pooler_output: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -202,7 +201,7 @@ class TFBaseModelOutputWithPast(ModelOutput):
heads.
"""
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -235,7 +234,7 @@ class TFBaseModelOutputWithCrossAttentions(ModelOutput):
weighted average in the cross-attention heads.
"""
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
cross_attentions: tuple[tf.Tensor] | None = None
@ -277,7 +276,7 @@ class TFBaseModelOutputWithPastAndCrossAttentions(ModelOutput):
weighted average in the cross-attention heads.
"""
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -334,7 +333,7 @@ class TFSeq2SeqModelOutput(ModelOutput):
self-attention heads.
"""
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
decoder_hidden_states: tuple[tf.Tensor] | None = None
decoder_attentions: tuple[tf.Tensor] | None = None
@ -368,7 +367,7 @@ class TFCausalLMOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -403,7 +402,7 @@ class TFCausalLMOutputWithPast(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -445,7 +444,7 @@ class TFCausalLMOutputWithCrossAttentions(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -476,7 +475,7 @@ class TFMaskedLMOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -530,7 +529,7 @@ class TFSeq2SeqLMOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
decoder_hidden_states: tuple[tf.Tensor] | None = None
decoder_attentions: tuple[tf.Tensor] | None = None
@ -565,7 +564,7 @@ class TFNextSentencePredictorOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -594,7 +593,7 @@ class TFSequenceClassifierOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -645,7 +644,7 @@ class TFSeq2SeqSequenceClassifierOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
decoder_hidden_states: tuple[tf.Tensor] | None = None
decoder_attentions: tuple[tf.Tensor] | None = None
@ -687,7 +686,7 @@ class TFSemanticSegmenterOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -719,7 +718,7 @@ class TFSemanticSegmenterOutputWithNoAttention(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
@ -745,7 +744,7 @@ class TFImageClassifierOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -776,7 +775,7 @@ class TFMultipleChoiceModelOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -805,7 +804,7 @@ class TFTokenClassifierOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -836,8 +835,8 @@ class TFQuestionAnsweringModelOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
start_logits: Optional[tf.Tensor] = None
end_logits: Optional[tf.Tensor] = None
start_logits: tf.Tensor | None = None
end_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -887,8 +886,8 @@ class TFSeq2SeqQuestionAnsweringModelOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
start_logits: Optional[tf.Tensor] = None
end_logits: Optional[tf.Tensor] = None
start_logits: tf.Tensor | None = None
end_logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
decoder_hidden_states: tuple[tf.Tensor] | None = None
decoder_attentions: tuple[tf.Tensor] | None = None
@ -927,7 +926,7 @@ class TFSequenceClassifierOutputWithPast(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -950,8 +949,8 @@ class TFImageClassifierOutputWithNoAttention(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
hidden_states: Optional[tuple[tf.Tensor, ...]] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
@dataclass
@ -977,7 +976,7 @@ class TFMaskedImageModelingOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
reconstruction: Optional[tf.Tensor] = None
reconstruction: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None

View File

@ -27,7 +27,7 @@ import re
import warnings
from collections.abc import Mapping
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
from typing import TYPE_CHECKING, Any, Callable, Union
import h5py
import numpy as np
@ -1412,10 +1412,10 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
dataset: datasets.Dataset, # noqa:F821
batch_size: int = 8,
shuffle: bool = True,
tokenizer: Optional[PreTrainedTokenizerBase] = None,
collate_fn: Optional[Callable] = None,
collate_fn_args: Optional[dict[str, Any]] = None,
drop_remainder: Optional[bool] = None,
tokenizer: PreTrainedTokenizerBase | None = None,
collate_fn: Callable | None = None,
collate_fn_args: dict[str, Any] | None = None,
drop_remainder: bool | None = None,
prefetch: bool = True,
):
"""
@ -1811,14 +1811,14 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
self,
output_dir,
model_name: str,
language: Optional[str] = None,
license: Optional[str] = None,
tags: Optional[str] = None,
finetuned_from: Optional[str] = None,
tasks: Optional[str] = None,
dataset_tags: Optional[Union[str, list[str]]] = None,
dataset: Optional[Union[str, list[str]]] = None,
dataset_args: Optional[Union[str, list[str]]] = None,
language: str | None = None,
license: str | None = None,
tags: str | None = None,
finetuned_from: str | None = None,
tasks: str | None = None,
dataset_tags: str | list[str] | None = None,
dataset: str | list[str] | None = None,
dataset_args: str | list[str] | None = None,
):
"""
Creates a draft of a model card using the information available to the `Trainer`.
@ -1887,7 +1887,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
self.build_in_name_scope()
main_layer.set_input_embeddings(value)
def get_output_embeddings(self) -> Union[None, keras.layers.Layer]:
def get_output_embeddings(self) -> None | keras.layers.Layer:
"""
Returns the model's output embeddings
@ -1924,7 +1924,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
self.build_in_name_scope()
lm_head.set_output_embeddings(value)
def get_output_layer_with_bias(self) -> Union[None, keras.layers.Layer]:
def get_output_layer_with_bias(self) -> None | keras.layers.Layer:
"""
Get the layer that handles a bias attribute in case the model has an LM head with weights tied to the
embeddings
@ -1937,7 +1937,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
)
return self.get_lm_head()
def get_prefix_bias_name(self) -> Union[None, str]:
def get_prefix_bias_name(self) -> None | str:
"""
Get the concatenated _prefix name of the bias from the model name to the parent layer
@ -1947,7 +1947,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return None
def get_bias(self) -> Union[None, dict[str, tf.Variable]]:
def get_bias(self) -> None | dict[str, tf.Variable]:
"""
Dict of bias attached to an LM head. The key represents the name of the bias attribute.
@ -1989,9 +1989,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
"""
return None
def resize_token_embeddings(
self, new_num_tokens: Optional[int] = None
) -> Union[keras.layers.Embedding, tf.Variable]:
def resize_token_embeddings(self, new_num_tokens: int | None = None) -> keras.layers.Embedding | tf.Variable:
"""
Resizes input token embeddings matrix of the model if `new_num_tokens != config.vocab_size`.
@ -2022,7 +2020,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
return model_embeds
def _v2_resized_token_embeddings(self, new_num_tokens: Optional[int] = None) -> keras.layers.Embedding:
def _v2_resized_token_embeddings(self, new_num_tokens: int | None = None) -> keras.layers.Embedding:
"""
Resizes input token embeddings matrix of the model if `new_num_tokens != config.vocab_size`.
@ -2346,10 +2344,10 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
version=1,
push_to_hub=False,
signatures=None,
max_shard_size: Union[int, str] = "5GB",
max_shard_size: int | str = "5GB",
create_pr: bool = False,
safe_serialization: bool = False,
token: Optional[Union[str, bool]] = None,
token: str | bool | None = None,
**kwargs,
):
"""
@ -2525,16 +2523,16 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
@classmethod
def from_pretrained(
cls,
pretrained_model_name_or_path: Optional[Union[str, os.PathLike]],
pretrained_model_name_or_path: str | os.PathLike | None,
*model_args,
config: Optional[Union[PretrainedConfig, str, os.PathLike]] = None,
cache_dir: Optional[Union[str, os.PathLike]] = None,
config: PretrainedConfig | str | os.PathLike | None = None,
cache_dir: str | os.PathLike | None = None,
ignore_mismatched_sizes: bool = False,
force_download: bool = False,
local_files_only: bool = False,
token: Optional[Union[str, bool]] = None,
token: str | bool | None = None,
revision: str = "main",
use_safetensors: Optional[bool] = None,
use_safetensors: bool | None = None,
**kwargs,
):
r"""
@ -3121,13 +3119,13 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
def push_to_hub(
self,
repo_id: str,
use_temp_dir: Optional[bool] = None,
commit_message: Optional[str] = None,
private: Optional[bool] = None,
max_shard_size: Optional[Union[int, str]] = "10GB",
token: Optional[Union[bool, str]] = None,
use_temp_dir: bool | None = None,
commit_message: str | None = None,
private: bool | None = None,
max_shard_size: int | str | None = "10GB",
token: bool | str | None = None,
# (`use_auth_token` is deprecated: we have to keep it here as we don't have **kwargs)
use_auth_token: Optional[Union[bool, str]] = None,
use_auth_token: bool | str | None = None,
create_pr: bool = False,
**base_model_card_args,
) -> str:
@ -3314,7 +3312,7 @@ class TFSharedEmbeddings(keras.layers.Layer):
# TODO (joao): flagged for detection due to embeddings refactor
def __init__(self, vocab_size: int, hidden_size: int, initializer_range: Optional[float] = None, **kwargs):
def __init__(self, vocab_size: int, hidden_size: int, initializer_range: float | None = None, **kwargs):
super().__init__(**kwargs)
self.vocab_size = vocab_size
self.hidden_size = hidden_size

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -164,10 +163,10 @@ class TFAlbertEmbeddings(keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call
def call(
self,
input_ids: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
token_type_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
past_key_values_length=0,
training: bool = False,
) -> tf.Tensor:
@ -403,7 +402,7 @@ class TFAlbertLayerGroup(keras.layers.Layer):
output_attentions: bool,
output_hidden_states: bool,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
layer_hidden_states = () if output_hidden_states else None
layer_attentions = () if output_attentions else None
@ -466,7 +465,7 @@ class TFAlbertTransformer(keras.layers.Layer):
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
hidden_states = self.embedding_hidden_mapping_in(inputs=hidden_states)
all_attentions = () if output_attentions else None
all_hidden_states = (hidden_states,) if output_hidden_states else None
@ -629,11 +628,11 @@ class TFAlbertMainLayer(keras.layers.Layer):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None:
@ -749,9 +748,9 @@ class TFAlbertForPreTrainingOutput(ModelOutput):
heads.
"""
loss: Optional[tf.Tensor] = None
prediction_logits: Optional[tf.Tensor] = None
sop_logits: Optional[tf.Tensor] = None
loss: tf.Tensor | None = None
prediction_logits: tf.Tensor | None = None
sop_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -879,11 +878,11 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
outputs = self.albert(
input_ids=input_ids,
attention_mask=attention_mask,
@ -942,13 +941,13 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel, TFAlbertPreTrainingLoss):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
sentence_order_label: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFAlbertForPreTrainingOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFAlbertForPreTrainingOutput | tuple[tf.Tensor]:
r"""
Return:
@ -1070,12 +1069,12 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss)
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1193,12 +1192,12 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1290,12 +1289,12 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1382,13 +1381,13 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.
@ -1485,12 +1484,12 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`

View File

@ -17,7 +17,6 @@
from __future__ import annotations
import random
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -103,7 +102,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
return tf.tile(mask[None, None, :, :], (bsz, 1, 1, 1))
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -129,7 +128,7 @@ class TFBartLearnedPositionalEmbedding(keras.layers.Embedding):
def call(
self,
input_shape: Optional[tf.TensorShape] = None,
input_shape: tf.TensorShape | None = None,
past_key_values_length: int = 0,
position_ids: tf.Tensor | None = None,
):
@ -184,7 +183,7 @@ class TFBartAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel"""
@ -334,7 +333,7 @@ class TFBartEncoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor,
attention_mask: np.ndarray | tf.Tensor | None,
layer_head_mask: tf.Tensor | None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tf.Tensor:
"""
Args:
@ -427,8 +426,8 @@ class TFBartDecoderLayer(keras.layers.Layer):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
training: Optional[bool] = False,
past_key_value: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
"""
Args:
@ -750,7 +749,7 @@ class TFBartEncoder(keras.layers.Layer):
config: BartConfig
"""
def __init__(self, config: BartConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: BartConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.dropout = keras.layers.Dropout(config.dropout)
@ -776,11 +775,11 @@ class TFBartEncoder(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutput | tuple[tf.Tensor]:
"""
Args:
input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`):
@ -910,7 +909,7 @@ class TFBartDecoder(keras.layers.Layer):
embed_tokens: output embedding
"""
def __init__(self, config: BartConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: BartConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.padding_idx = config.pad_token_id
@ -938,13 +937,13 @@ class TFBartDecoder(keras.layers.Layer):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
r"""
Args:
input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`):
@ -1167,17 +1166,17 @@ class TFBartMainLayer(keras.layers.Layer):
head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs,
) -> Union[TFSeq2SeqModelOutput, tuple[tf.Tensor]]:
) -> TFSeq2SeqModelOutput | tuple[tf.Tensor]:
# different to other models, Bart automatically creates decoder_input_ids from
# input_ids if no decoder_input_ids are provided
if decoder_input_ids is None and decoder_inputs_embeds is None:
@ -1297,17 +1296,17 @@ class TFBartModel(TFBartPretrainedModel):
head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
outputs = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
@ -1429,17 +1428,17 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel, TFCausalLanguageMode
head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[TFBaseModelOutput] = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
encoder_outputs: TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSeq2SeqLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSeq2SeqLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
@ -1600,17 +1599,17 @@ class TFBartForSequenceClassification(TFBartPretrainedModel, TFSequenceClassific
head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[TFBaseModelOutput] = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
encoder_outputs: TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSeq2SeqSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSeq2SeqSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,

View File

@ -20,7 +20,6 @@ from __future__ import annotations
import math
import warnings
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -161,10 +160,10 @@ class TFBertEmbeddings(keras.layers.Layer):
def call(
self,
input_ids: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
token_type_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
past_key_values_length=0,
training: bool = False,
) -> tf.Tensor:
@ -589,12 +588,12 @@ class TFBertEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool],
use_cache: bool | None,
output_attentions: bool,
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -851,13 +850,13 @@ class TFBertMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder:
use_cache = False
@ -1048,10 +1047,10 @@ class TFBertForPreTrainingOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
prediction_logits: Optional[tf.Tensor] = None
seq_relationship_logits: Optional[tf.Tensor] = None
hidden_states: Optional[Union[tuple[tf.Tensor], tf.Tensor]] = None
attentions: Optional[Union[tuple[tf.Tensor], tf.Tensor]] = None
prediction_logits: tf.Tensor | None = None
seq_relationship_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | tf.Tensor | None = None
attentions: tuple[tf.Tensor] | tf.Tensor | None = None
BERT_START_DOCSTRING = r"""
@ -1179,13 +1178,13 @@ class TFBertModel(TFBertPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]:
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1273,13 +1272,13 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
next_sentence_label: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFBertForPreTrainingOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFBertForPreTrainingOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1405,12 +1404,12 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1509,15 +1508,15 @@ class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
training: bool | None = False,
**kwargs,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]:
) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1617,12 +1616,12 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel, TFNextSentencePredi
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
next_sentence_label: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFNextSentencePredictorOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFNextSentencePredictorOutput | tuple[tf.Tensor]:
r"""
Return:
@ -1731,12 +1730,12 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1820,12 +1819,12 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1946,12 +1945,12 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -2045,13 +2044,13 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import os
import random
import warnings
from typing import Optional, Union
import tensorflow as tf
@ -104,7 +103,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -179,7 +178,7 @@ class TFBlenderbotAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel"""
@ -330,7 +329,7 @@ class TFBlenderbotEncoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor,
attention_mask: tf.Tensor,
layer_head_mask: tf.Tensor,
training: Optional[bool] = False,
training: bool | None = False,
):
"""
Args:
@ -425,7 +424,7 @@ class TFBlenderbotDecoderLayer(keras.layers.Layer):
layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: tuple[tf.Tensor] | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
"""
Args:
@ -687,7 +686,7 @@ class TFBlenderbotEncoder(keras.layers.Layer):
config: BlenderbotConfig
"""
def __init__(self, config: BlenderbotConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: BlenderbotConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.dropout = keras.layers.Dropout(config.dropout)
@ -859,7 +858,7 @@ class TFBlenderbotDecoder(keras.layers.Layer):
embed_tokens: output embedding
"""
def __init__(self, config: BlenderbotConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: BlenderbotConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.padding_idx = config.pad_token_id
@ -1128,7 +1127,7 @@ class TFBlenderbotMainLayer(keras.layers.Layer):
head_mask=None,
decoder_head_mask=None,
cross_attn_head_mask=None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None,
encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values=None,
inputs_embeds=None,
decoder_inputs_embeds=None,
@ -1230,7 +1229,7 @@ class TFBlenderbotModel(TFBlenderbotPreTrainedModel):
return self.model.decoder
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs):
def from_pretrained(cls, pretrained_model_name_or_path: str | os.PathLike | None, *model_args, **kwargs):
if pretrained_model_name_or_path == "facebook/blenderbot-90M":
from ..blenderbot_small import TFBlenderbotSmallModel
@ -1262,17 +1261,17 @@ class TFBlenderbotModel(TFBlenderbotPreTrainedModel):
head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None,
encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs,
) -> Union[tuple[tf.Tensor], TFSeq2SeqModelOutput]:
) -> tuple[tf.Tensor] | TFSeq2SeqModelOutput:
outputs = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
@ -1385,7 +1384,7 @@ class TFBlenderbotForConditionalGeneration(TFBlenderbotPreTrainedModel, TFCausal
self.bias_layer.bias.assign(value["final_logits_bias"])
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs):
def from_pretrained(cls, pretrained_model_name_or_path: str | os.PathLike | None, *model_args, **kwargs):
if pretrained_model_name_or_path == "facebook/blenderbot-90M":
from ..blenderbot_small import TFBlenderbotSmallForConditionalGeneration
@ -1414,17 +1413,17 @@ class TFBlenderbotForConditionalGeneration(TFBlenderbotPreTrainedModel, TFCausal
head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None,
encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple[tf.Tensor], TFSeq2SeqLMOutput]:
training: bool | None = False,
) -> tuple[tf.Tensor] | TFSeq2SeqLMOutput:
r"""
labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -17,7 +17,6 @@
from __future__ import annotations
import random
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -103,7 +102,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -179,7 +178,7 @@ class TFBlenderbotSmallAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel"""
@ -330,7 +329,7 @@ class TFBlenderbotSmallEncoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor,
attention_mask: np.ndarray | tf.Tensor | None,
layer_head_mask: tf.Tensor | None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tf.Tensor:
"""
Args:
@ -424,8 +423,8 @@ class TFBlenderbotSmallDecoderLayer(keras.layers.Layer):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
training: Optional[bool] = False,
past_key_value: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
"""
Args:
@ -691,7 +690,7 @@ class TFBlenderbotSmallEncoder(keras.layers.Layer):
config: BlenderbotSmallConfig
"""
def __init__(self, config: BlenderbotSmallConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: BlenderbotSmallConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.dropout = keras.layers.Dropout(config.dropout)
@ -863,7 +862,7 @@ class TFBlenderbotSmallDecoder(keras.layers.Layer):
embed_tokens: output embedding
"""
def __init__(self, config: BlenderbotSmallConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: BlenderbotSmallConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.padding_idx = config.pad_token_id
@ -1129,7 +1128,7 @@ class TFBlenderbotSmallMainLayer(keras.layers.Layer):
head_mask=None,
decoder_head_mask=None,
cross_attn_head_mask=None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None,
encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values=None,
inputs_embeds=None,
decoder_inputs_embeds=None,
@ -1247,17 +1246,17 @@ class TFBlenderbotSmallModel(TFBlenderbotSmallPreTrainedModel):
head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None,
encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs,
) -> Union[tuple[tf.Tensor], TFSeq2SeqModelOutput]:
) -> tuple[tf.Tensor] | TFSeq2SeqModelOutput:
outputs = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
@ -1383,17 +1382,17 @@ class TFBlenderbotSmallForConditionalGeneration(TFBlenderbotSmallPreTrainedModel
head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[TFBaseModelOutput] = None,
encoder_outputs: TFBaseModelOutput | None = None,
past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple[tf.Tensor], TFSeq2SeqLMOutput]:
training: bool | None = False,
) -> tuple[tf.Tensor] | TFSeq2SeqLMOutput:
r"""
labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -18,7 +18,7 @@ from __future__ import annotations
import warnings
from dataclasses import dataclass
from typing import Any, Optional, Union
from typing import Any
import tensorflow as tf
@ -96,7 +96,7 @@ class TFBlipForConditionalGenerationModelOutput(ModelOutput):
loss: tuple[tf.Tensor] | None = None
logits: tuple[tf.Tensor] | None = None
image_embeds: tf.Tensor | None = None
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -138,7 +138,7 @@ class TFBlipTextVisionModelOutput(ModelOutput):
loss: tf.Tensor | None = None
image_embeds: tf.Tensor | None = None
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -179,7 +179,7 @@ class TFBlipImageTextMatchingModelOutput(ModelOutput):
itm_score: tf.Tensor | None = None
loss: tf.Tensor | None = None
image_embeds: tf.Tensor | None = None
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
vision_pooler_output: tf.Tensor | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -209,10 +209,10 @@ class TFBlipOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits_per_image: Optional[tf.Tensor] = None
logits_per_text: Optional[tf.Tensor] = None
text_embeds: Optional[tf.Tensor] = None
image_embeds: Optional[tf.Tensor] = None
logits_per_image: tf.Tensor | None = None
logits_per_text: tf.Tensor | None = None
text_embeds: tf.Tensor | None = None
image_embeds: tf.Tensor | None = None
text_model_output: TFBaseModelOutputWithPooling = None
vision_model_output: TFBaseModelOutputWithPooling = None
@ -309,9 +309,9 @@ class TFBlipTextEmbeddings(keras.layers.Layer):
def call(
self,
input_ids: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
) -> tf.Tensor:
"""
Applies embedding based on inputs tensor.
@ -367,8 +367,8 @@ class TFBlipAttention(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False,
training: Optional[bool] = None,
output_attentions: bool | None = False,
training: bool | None = None,
) -> tuple[tf.Tensor, tf.Tensor | None, tuple[tf.Tensor] | None]:
"""Input shape: Batch x Time x Channel"""
@ -470,8 +470,8 @@ class TFBlipEncoderLayer(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor,
output_attentions: Optional[bool] = False,
training: Optional[bool] = None,
output_attentions: bool | None = False,
training: bool | None = None,
) -> tuple[tf.Tensor]:
"""
Args:
@ -624,11 +624,11 @@ class TFBlipEncoder(keras.layers.Layer):
self,
inputs_embeds,
attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = None,
) -> Union[tuple, TFBaseModelOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = None,
) -> tuple | TFBaseModelOutput:
r"""
Args:
inputs_embeds (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
@ -723,11 +723,11 @@ class TFBlipVisionModel(TFBlipPreTrainedModel):
def call(
self,
pixel_values: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = None,
) -> Union[tuple, TFBaseModelOutputWithPooling]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = None,
) -> tuple | TFBaseModelOutputWithPooling:
r"""
Returns:
@ -861,12 +861,12 @@ class TFBlipMainLayer(keras.layers.Layer):
pixel_values: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
return_loss: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = None,
) -> Union[tuple, TFBlipOutput]:
return_loss: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = None,
) -> tuple | TFBlipOutput:
# Use BLIP model's config for some fields (if specified) instead of those of vision & text components.
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
@ -954,12 +954,12 @@ class TFBlipModel(TFBlipPreTrainedModel):
pixel_values: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
return_loss: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = None,
) -> Union[tuple, TFBlipOutput]:
return_loss: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = None,
) -> tuple | TFBlipOutput:
r"""
Returns:
@ -1003,7 +1003,7 @@ class TFBlipModel(TFBlipPreTrainedModel):
input_ids: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
return_dict: Optional[bool] = None,
return_dict: bool | None = None,
) -> tf.Tensor:
r"""
Returns:
@ -1039,7 +1039,7 @@ class TFBlipModel(TFBlipPreTrainedModel):
def get_image_features(
self,
pixel_values: tf.Tensor | None = None,
return_dict: Optional[bool] = None,
return_dict: bool | None = None,
) -> tf.Tensor:
r"""
Returns:
@ -1116,12 +1116,12 @@ class TFBlipForConditionalGeneration(TFBlipPreTrainedModel):
pixel_values: tf.Tensor,
input_ids: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
labels: tf.Tensor | None = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = None,
) -> Union[tuple, TFBlipForConditionalGenerationModelOutput]:
return_dict: bool | None = None,
training: bool | None = None,
) -> tuple | TFBlipForConditionalGenerationModelOutput:
r"""
Returns:
@ -1333,12 +1333,12 @@ class TFBlipForQuestionAnswering(TFBlipPreTrainedModel):
decoder_input_ids: tf.Tensor | None = None,
decoder_attention_mask: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
labels: tf.Tensor | None = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = None,
) -> Union[tuple, TFBlipTextVisionModelOutput]:
return_dict: bool | None = None,
training: bool | None = None,
) -> tuple | TFBlipTextVisionModelOutput:
r"""
Returns:
@ -1586,13 +1586,13 @@ class TFBlipForImageTextRetrieval(TFBlipPreTrainedModel):
self,
input_ids: tf.Tensor,
pixel_values: tf.Tensor | None = None,
use_itm_head: Optional[bool] = True,
use_itm_head: bool | None = True,
attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = None,
) -> Union[tuple, TFBlipImageTextMatchingModelOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = None,
) -> tuple | TFBlipImageTextMatchingModelOutput:
r"""
Returns:

View File

@ -17,7 +17,6 @@
from __future__ import annotations
import math
from typing import Optional
import tensorflow as tf
@ -303,7 +302,7 @@ class TFBlipTextSelfOutput(keras.layers.Layer):
self.dropout = keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.config = config
def call(self, hidden_states: tf.Tensor, input_tensor: tf.Tensor, training: Optional[bool] = None) -> tf.Tensor:
def call(self, hidden_states: tf.Tensor, input_tensor: tf.Tensor, training: bool | None = None) -> tf.Tensor:
hidden_states = self.dense(inputs=hidden_states)
hidden_states = self.dropout(inputs=hidden_states, training=training)
hidden_states = self.LayerNorm(inputs=hidden_states + input_tensor)
@ -338,8 +337,8 @@ class TFBlipTextAttention(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None = None,
encoder_attention_mask: tf.Tensor | None = None,
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
output_attentions: Optional[bool] = False,
training: Optional[bool] = None,
output_attentions: bool | None = False,
training: bool | None = None,
):
self_outputs = self.self(
hidden_states,

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math
import warnings
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -695,12 +694,12 @@ class TFCamembertEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool],
use_cache: bool | None,
output_attentions: bool,
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -809,13 +808,13 @@ class TFCamembertMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder:
use_cache = False
@ -1008,13 +1007,13 @@ class TFCamembertModel(TFCamembertPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFBaseModelOutputWithPoolingAndCrossAttentions]:
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFBaseModelOutputWithPoolingAndCrossAttentions:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1164,12 +1163,12 @@ class TFCamembertForMaskedLM(TFCamembertPreTrainedModel, TFMaskedLanguageModelin
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1294,12 +1293,12 @@ class TFCamembertForSequenceClassification(TFCamembertPreTrainedModel, TFSequenc
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1390,12 +1389,12 @@ class TFCamembertForTokenClassification(TFCamembertPreTrainedModel, TFTokenClass
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1482,12 +1481,12 @@ class TFCamembertForMultipleChoice(TFCamembertPreTrainedModel, TFMultipleChoiceL
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1586,13 +1585,13 @@ class TFCamembertForQuestionAnswering(TFCamembertPreTrainedModel, TFQuestionAnsw
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.
@ -1706,14 +1705,14 @@ class TFCamembertForCausalLM(TFCamembertPreTrainedModel, TFCausalLanguageModelin
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if

View File

@ -18,7 +18,7 @@ from __future__ import annotations
import math
from dataclasses import dataclass
from typing import Any, Optional, Union
from typing import Any
import numpy as np
import tensorflow as tf
@ -55,7 +55,7 @@ LARGE_NEGATIVE = -1e8
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -108,10 +108,10 @@ class TFCLIPOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits_per_image: Optional[tf.Tensor] = None
logits_per_text: Optional[tf.Tensor] = None
text_embeds: Optional[tf.Tensor] = None
image_embeds: Optional[tf.Tensor] = None
logits_per_image: tf.Tensor | None = None
logits_per_text: tf.Tensor | None = None
text_embeds: tf.Tensor | None = None
image_embeds: tf.Tensor | None = None
text_model_output: TFBaseModelOutputWithPooling = None
vision_model_output: TFBaseModelOutputWithPooling = None
@ -225,9 +225,9 @@ class TFCLIPTextEmbeddings(keras.layers.Layer):
def call(
self,
input_ids: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
) -> tf.Tensor:
"""
Applies embedding based on inputs tensor.
@ -498,7 +498,7 @@ class TFCLIPEncoder(keras.layers.Layer):
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
@ -560,7 +560,7 @@ class TFCLIPTextTransformer(keras.layers.Layer):
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
input_shape = shape_list(input_ids)
embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids)
@ -677,11 +677,11 @@ class TFCLIPTextMainLayer(keras.layers.Layer):
input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if input_ids is None:
raise ValueError("You have to specify input_ids")
@ -728,7 +728,7 @@ class TFCLIPVisionTransformer(keras.layers.Layer):
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
embedding_output = self.embeddings(pixel_values=pixel_values)
embedding_output = self.pre_layernorm(inputs=embedding_output)
@ -790,11 +790,11 @@ class TFCLIPVisionMainLayer(keras.layers.Layer):
def call(
self,
pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if pixel_values is None:
raise ValueError("You have to specify pixel_values")
@ -892,9 +892,9 @@ class TFCLIPMainLayer(keras.layers.Layer):
input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> tf.Tensor:
if input_ids is None:
@ -924,9 +924,9 @@ class TFCLIPMainLayer(keras.layers.Layer):
def get_image_features(
self,
pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> tf.Tensor:
if pixel_values is None:
@ -952,12 +952,12 @@ class TFCLIPMainLayer(keras.layers.Layer):
pixel_values: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
return_loss: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
return_loss: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFCLIPOutput, tuple[tf.Tensor]]:
) -> TFCLIPOutput | tuple[tf.Tensor]:
if input_ids is None:
raise ValueError("You have to specify either input_ids")
if pixel_values is None:
@ -1191,11 +1191,11 @@ class TFCLIPTextModel(TFCLIPPreTrainedModel):
input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
r"""
Returns:
@ -1250,11 +1250,11 @@ class TFCLIPVisionModel(TFCLIPPreTrainedModel):
def call(
self,
pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
r"""
Returns:
@ -1313,9 +1313,9 @@ class TFCLIPModel(TFCLIPPreTrainedModel):
input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> tf.Tensor:
r"""
@ -1351,9 +1351,9 @@ class TFCLIPModel(TFCLIPPreTrainedModel):
def get_image_features(
self,
pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> tf.Tensor:
r"""
@ -1397,12 +1397,12 @@ class TFCLIPModel(TFCLIPPreTrainedModel):
pixel_values: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
return_loss: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
return_loss: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFCLIPOutput, tuple[tf.Tensor]]:
) -> TFCLIPOutput | tuple[tf.Tensor]:
r"""
Returns:

View File

@ -16,8 +16,6 @@
from __future__ import annotations
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -106,10 +104,10 @@ class TFConvBertEmbeddings(keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call
def call(
self,
input_ids: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
token_type_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
past_key_values_length=0,
training: bool = False,
) -> tf.Tensor:
@ -860,16 +858,16 @@ class TFConvBertModel(TFConvBertPreTrainedModel):
def call(
self,
input_ids: TFModelInputType | None = None,
attention_mask: Optional[Union[np.array, tf.Tensor]] = None,
token_type_ids: Optional[Union[np.array, tf.Tensor]] = None,
position_ids: Optional[Union[np.array, tf.Tensor]] = None,
head_mask: Optional[Union[np.array, tf.Tensor]] = None,
attention_mask: np.array | tf.Tensor | None = None,
token_type_ids: np.array | tf.Tensor | None = None,
position_ids: np.array | tf.Tensor | None = None,
head_mask: np.array | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
outputs = self.convbert(
input_ids=input_ids,
attention_mask=attention_mask,
@ -995,12 +993,12 @@ class TFConvBertForMaskedLM(TFConvBertPreTrainedModel, TFMaskedLanguageModelingL
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFMaskedLMOutput]:
training: bool | None = False,
) -> tuple | TFMaskedLMOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1120,12 +1118,12 @@ class TFConvBertForSequenceClassification(TFConvBertPreTrainedModel, TFSequenceC
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFSequenceClassifierOutput]:
training: bool | None = False,
) -> tuple | TFSequenceClassifierOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1208,12 +1206,12 @@ class TFConvBertForMultipleChoice(TFConvBertPreTrainedModel, TFMultipleChoiceLos
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFMultipleChoiceModelOutput]:
training: bool | None = False,
) -> tuple | TFMultipleChoiceModelOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1316,12 +1314,12 @@ class TFConvBertForTokenClassification(TFConvBertPreTrainedModel, TFTokenClassif
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFTokenClassifierOutput]:
training: bool | None = False,
) -> tuple | TFTokenClassifierOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1399,13 +1397,13 @@ class TFConvBertForQuestionAnswering(TFConvBertPreTrainedModel, TFQuestionAnswer
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: tf.Tensor | None = None,
end_positions: tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFQuestionAnsweringModelOutput]:
training: bool | None = False,
) -> tuple | TFQuestionAnsweringModelOutput:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -16,8 +16,6 @@
from __future__ import annotations
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -238,7 +236,7 @@ class TFConvNextStage(keras.layers.Layer):
kernel_size: int = 2,
stride: int = 2,
depth: int = 2,
drop_path_rates: Optional[list[float]] = None,
drop_path_rates: list[float] | None = None,
**kwargs,
):
super().__init__(**kwargs)
@ -365,10 +363,10 @@ class TFConvNextMainLayer(keras.layers.Layer):
def call(
self,
pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
@ -503,10 +501,10 @@ class TFConvNextModel(TFConvNextPreTrainedModel):
def call(
self,
pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
r"""
Returns:
@ -589,11 +587,11 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas
def call(
self,
pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ...,

View File

@ -16,8 +16,6 @@
from __future__ import annotations
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -279,7 +277,7 @@ class TFConvNextV2Stage(keras.layers.Layer):
kernel_size: int = 2,
stride: int = 2,
depth: int = 2,
drop_path_rates: Optional[list[float]] = None,
drop_path_rates: list[float] | None = None,
**kwargs,
):
super().__init__(**kwargs)
@ -367,9 +365,9 @@ class TFConvNextV2Encoder(keras.layers.Layer):
def call(
self,
hidden_states: tf.Tensor,
output_hidden_states: Optional[bool] = False,
return_dict: Optional[bool] = True,
) -> Union[tuple, TFBaseModelOutputWithNoAttention]:
output_hidden_states: bool | None = False,
return_dict: bool | None = True,
) -> tuple | TFBaseModelOutputWithNoAttention:
all_hidden_states = () if output_hidden_states else None
for i, layer_module in enumerate(self.stages):
@ -411,10 +409,10 @@ class TFConvNextV2MainLayer(keras.layers.Layer):
def call(
self,
pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
@ -557,10 +555,10 @@ class TFConvNextV2Model(TFConvNextV2PreTrainedModel):
def call(
self,
pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndNoAttention, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPoolingAndNoAttention | tuple[tf.Tensor]:
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
@ -627,11 +625,11 @@ class TFConvNextV2ForImageClassification(TFConvNextV2PreTrainedModel, TFSequence
def call(
self,
pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFImageClassifierOutputWithNoAttention, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFImageClassifierOutputWithNoAttention | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ...,

View File

@ -17,8 +17,6 @@
from __future__ import annotations
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -303,18 +301,18 @@ class TFCTRLMainLayer(keras.layers.Layer):
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFBaseModelOutputWithPast]:
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFBaseModelOutputWithPast:
# If using past key value states, only the last tokens
# should be given as an input
if past_key_values is not None:
@ -594,18 +592,18 @@ class TFCTRLModel(TFCTRLPreTrainedModel):
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFBaseModelOutputWithPast]:
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFBaseModelOutputWithPast:
outputs = self.transformer(
input_ids=input_ids,
past_key_values=past_key_values,
@ -722,19 +720,19 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel, TFCausalLanguageModelingLoss):
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFCausalLMOutputWithPast]:
training: bool | None = False,
) -> tuple | TFCausalLMOutputWithPast:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,
@ -835,19 +833,19 @@ class TFCTRLForSequenceClassification(TFCTRLPreTrainedModel, TFSequenceClassific
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFSequenceClassifierOutput]:
training: bool | None = False,
) -> tuple | TFSequenceClassifierOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import collections.abc
from dataclasses import dataclass
from typing import Optional, Union
import tensorflow as tf
@ -65,8 +64,8 @@ class TFBaseModelOutputWithCLSToken(ModelOutput):
the initial embedding outputs.
"""
last_hidden_state: Optional[tf.Tensor] = None
cls_token_value: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
cls_token_value: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
@ -766,10 +765,10 @@ class TFCvtEncoder(keras.layers.Layer):
def call(
self,
pixel_values: TFModelInputType,
output_hidden_states: Optional[bool] = False,
return_dict: Optional[bool] = True,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithCLSToken, tuple[tf.Tensor]]:
output_hidden_states: bool | None = False,
return_dict: bool | None = True,
training: bool | None = False,
) -> TFBaseModelOutputWithCLSToken | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
hidden_state = pixel_values
# When running on CPU, `keras.layers.Conv2D` doesn't support (batch_size, num_channels, height, width)
@ -821,10 +820,10 @@ class TFCvtMainLayer(keras.layers.Layer):
def call(
self,
pixel_values: TFModelInputType | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithCLSToken, tuple[tf.Tensor]]:
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithCLSToken | tuple[tf.Tensor]:
if pixel_values is None:
raise ValueError("You have to specify pixel_values")
@ -929,10 +928,10 @@ class TFCvtModel(TFCvtPreTrainedModel):
def call(
self,
pixel_values: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithCLSToken, tuple[tf.Tensor]]:
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithCLSToken | tuple[tf.Tensor]:
r"""
Returns:
@ -1015,10 +1014,10 @@ class TFCvtForImageClassification(TFCvtPreTrainedModel, TFSequenceClassification
self,
pixel_values: tf.Tensor | None = None,
labels: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFImageClassifierOutputWithNoAttention, tuple[tf.Tensor]]:
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFImageClassifierOutputWithNoAttention | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ...,

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import collections.abc
import math
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -90,8 +89,8 @@ class TFData2VecVisionModelOutputWithPooling(TFBaseModelOutputWithPooling):
heads.
"""
last_hidden_state: Optional[tf.Tensor] = None
pooler_output: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
pooler_output: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -258,7 +257,7 @@ class TFData2VecVisionPatchEmbeddings(keras.layers.Layer):
class TFData2VecVisionSelfAttention(keras.layers.Layer):
def __init__(self, config: Data2VecVisionConfig, window_size: Optional[tuple] = None, **kwargs):
def __init__(self, config: Data2VecVisionConfig, window_size: tuple | None = None, **kwargs):
super().__init__(**kwargs)
if config.hidden_size % config.num_attention_heads != 0:
@ -306,7 +305,7 @@ class TFData2VecVisionSelfAttention(keras.layers.Layer):
hidden_states: tf.Tensor,
head_mask: tf.Tensor,
output_attentions: bool,
relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None,
relative_position_bias: TFData2VecVisionRelativePositionBias | None = None,
training: bool = False,
) -> tuple[tf.Tensor]:
batch_size = shape_list(hidden_states)[0]
@ -402,7 +401,7 @@ class TFData2VecVisionSelfOutput(keras.layers.Layer):
class TFData2VecVisionAttention(keras.layers.Layer):
def __init__(self, config: Data2VecVisionConfig, window_size: Optional[tuple] = None, **kwargs):
def __init__(self, config: Data2VecVisionConfig, window_size: tuple | None = None, **kwargs):
super().__init__(**kwargs)
self.attention = TFData2VecVisionSelfAttention(config, window_size=window_size, name="attention")
@ -416,7 +415,7 @@ class TFData2VecVisionAttention(keras.layers.Layer):
input_tensor: tf.Tensor,
head_mask: tf.Tensor,
output_attentions: bool,
relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None,
relative_position_bias: TFData2VecVisionRelativePositionBias | None = None,
training: bool = False,
) -> tuple[tf.Tensor]:
self_outputs = self.attention(
@ -504,7 +503,7 @@ class TFData2VecVisionLayer(keras.layers.Layer):
"""This corresponds to the Block class in the timm implementation."""
def __init__(
self, config: Data2VecVisionConfig, window_size: Optional[tuple] = None, drop_path_rate: float = 0.0, **kwargs
self, config: Data2VecVisionConfig, window_size: tuple | None = None, drop_path_rate: float = 0.0, **kwargs
):
super().__init__(**kwargs)
self.config = config
@ -570,7 +569,7 @@ class TFData2VecVisionLayer(keras.layers.Layer):
hidden_states: tf.Tensor,
head_mask: tf.Tensor,
output_attentions: bool,
relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None,
relative_position_bias: TFData2VecVisionRelativePositionBias | None = None,
training: bool = False,
) -> tuple[tf.Tensor]:
self_attention_outputs = self.attention(
@ -667,7 +666,7 @@ class TFData2VecVisionRelativePositionBias(keras.layers.Layer):
class TFData2VecVisionEncoder(keras.layers.Layer):
def __init__(self, config: Data2VecVisionConfig, window_size: Optional[tuple] = None, **kwargs):
def __init__(self, config: Data2VecVisionConfig, window_size: tuple | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
if config.use_shared_relative_position_bias:
@ -696,7 +695,7 @@ class TFData2VecVisionEncoder(keras.layers.Layer):
output_attentions: bool = False,
output_hidden_states: bool = False,
return_dict: bool = True,
) -> Union[tuple, TFBaseModelOutput]:
) -> tuple | TFBaseModelOutput:
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
@ -783,11 +782,11 @@ class TFData2VecVisionMainLayer(keras.layers.Layer):
pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple, TFData2VecVisionModelOutputWithPooling]:
) -> tuple | TFData2VecVisionModelOutputWithPooling:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -995,11 +994,11 @@ class TFData2VecVisionModel(TFData2VecVisionPreTrainedModel):
pixel_values: TFModelInputType | None = None,
bool_masked_pos: tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple, TFData2VecVisionModelOutputWithPooling]:
) -> tuple | TFData2VecVisionModelOutputWithPooling:
r"""
bool_masked_pos (`tf.Tensor` of shape `(batch_size, num_patches)`, *optional*):
Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
@ -1059,12 +1058,12 @@ class TFData2VecVisionForImageClassification(TFData2VecVisionPreTrainedModel, TF
self,
pixel_values: TFModelInputType | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
@ -1121,10 +1120,10 @@ class TFData2VecVisionConvModule(keras.layers.Layer):
self,
in_channels: int,
out_channels: int,
kernel_size: Union[int, tuple[int, int]],
kernel_size: int | tuple[int, int],
padding: str = "valid",
bias: bool = False,
dilation: Union[int, tuple[int, int]] = 1,
dilation: int | tuple[int, int] = 1,
**kwargs,
) -> None:
super().__init__(**kwargs)
@ -1462,7 +1461,7 @@ class TFData2VecVisionFCNHead(keras.layers.Layer):
config: Data2VecVisionConfig,
in_index: int = 2,
kernel_size: int = 3,
dilation: Union[int, tuple[int, int]] = 1,
dilation: int | tuple[int, int] = 1,
**kwargs,
) -> None:
super().__init__(**kwargs)
@ -1599,10 +1598,10 @@ class TFData2VecVisionForSemanticSegmentation(TFData2VecVisionPreTrainedModel):
pixel_values: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[tuple, TFSemanticSegmenterOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
) -> tuple | TFSemanticSegmenterOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, height, width)`, *optional*):
Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import math
from collections.abc import Sequence
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -208,9 +207,9 @@ class TFDebertaAttention(keras.layers.Layer):
self,
input_tensor: tf.Tensor,
attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None,
relative_pos: Optional[tf.Tensor] = None,
rel_embeddings: Optional[tf.Tensor] = None,
query_states: tf.Tensor | None = None,
relative_pos: tf.Tensor | None = None,
rel_embeddings: tf.Tensor | None = None,
output_attentions: bool = False,
training: bool = False,
) -> tuple[tf.Tensor]:
@ -319,9 +318,9 @@ class TFDebertaLayer(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None,
relative_pos: Optional[tf.Tensor] = None,
rel_embeddings: Optional[tf.Tensor] = None,
query_states: tf.Tensor | None = None,
relative_pos: tf.Tensor | None = None,
rel_embeddings: tf.Tensor | None = None,
output_attentions: bool = False,
training: bool = False,
) -> tuple[tf.Tensor]:
@ -409,13 +408,13 @@ class TFDebertaEncoder(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None,
relative_pos: Optional[tf.Tensor] = None,
query_states: tf.Tensor | None = None,
relative_pos: tf.Tensor | None = None,
output_attentions: bool = False,
output_hidden_states: bool = False,
return_dict: bool = True,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
@ -651,9 +650,9 @@ class TFDebertaDisentangledSelfAttention(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None,
relative_pos: Optional[tf.Tensor] = None,
rel_embeddings: Optional[tf.Tensor] = None,
query_states: tf.Tensor | None = None,
relative_pos: tf.Tensor | None = None,
rel_embeddings: tf.Tensor | None = None,
output_attentions: bool = False,
training: bool = False,
) -> tuple[tf.Tensor]:
@ -881,11 +880,11 @@ class TFDebertaEmbeddings(keras.layers.Layer):
def call(
self,
input_ids: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
token_type_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None,
mask: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
mask: tf.Tensor | None = None,
training: bool = False,
) -> tf.Tensor:
"""
@ -1074,11 +1073,11 @@ class TFDebertaMainLayer(keras.layers.Layer):
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None:
@ -1255,11 +1254,11 @@ class TFDebertaModel(TFDebertaPreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutput | tuple[tf.Tensor]:
outputs = self.deberta(
input_ids=input_ids,
attention_mask=attention_mask,
@ -1314,12 +1313,12 @@ class TFDebertaForMaskedLM(TFDebertaPreTrainedModel, TFMaskedLanguageModelingLos
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1404,12 +1403,12 @@ class TFDebertaForSequenceClassification(TFDebertaPreTrainedModel, TFSequenceCla
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1497,12 +1496,12 @@ class TFDebertaForTokenClassification(TFDebertaPreTrainedModel, TFTokenClassific
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1579,13 +1578,13 @@ class TFDebertaForQuestionAnswering(TFDebertaPreTrainedModel, TFQuestionAnswerin
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -16,8 +16,6 @@
from __future__ import annotations
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -192,9 +190,9 @@ class TFDebertaV2Attention(keras.layers.Layer):
self,
input_tensor: tf.Tensor,
attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None,
relative_pos: Optional[tf.Tensor] = None,
rel_embeddings: Optional[tf.Tensor] = None,
query_states: tf.Tensor | None = None,
relative_pos: tf.Tensor | None = None,
rel_embeddings: tf.Tensor | None = None,
output_attentions: bool = False,
training: bool = False,
) -> tuple[tf.Tensor]:
@ -306,9 +304,9 @@ class TFDebertaV2Layer(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None,
relative_pos: Optional[tf.Tensor] = None,
rel_embeddings: Optional[tf.Tensor] = None,
query_states: tf.Tensor | None = None,
relative_pos: tf.Tensor | None = None,
rel_embeddings: tf.Tensor | None = None,
output_attentions: bool = False,
training: bool = False,
) -> tuple[tf.Tensor]:
@ -485,13 +483,13 @@ class TFDebertaV2Encoder(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None,
relative_pos: Optional[tf.Tensor] = None,
query_states: tf.Tensor | None = None,
relative_pos: tf.Tensor | None = None,
output_attentions: bool = False,
output_hidden_states: bool = False,
return_dict: bool = True,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
if len(shape_list(attention_mask)) <= 2:
input_mask = attention_mask
else:
@ -718,9 +716,9 @@ class TFDebertaV2DisentangledSelfAttention(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor,
query_states: Optional[tf.Tensor] = None,
relative_pos: Optional[tf.Tensor] = None,
rel_embeddings: Optional[tf.Tensor] = None,
query_states: tf.Tensor | None = None,
relative_pos: tf.Tensor | None = None,
rel_embeddings: tf.Tensor | None = None,
output_attentions: bool = False,
training: bool = False,
) -> tuple[tf.Tensor]:
@ -985,11 +983,11 @@ class TFDebertaV2Embeddings(keras.layers.Layer):
def call(
self,
input_ids: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
token_type_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None,
mask: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
mask: tf.Tensor | None = None,
training: bool = False,
) -> tf.Tensor:
"""
@ -1181,11 +1179,11 @@ class TFDebertaV2MainLayer(keras.layers.Layer):
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None:
@ -1364,11 +1362,11 @@ class TFDebertaV2Model(TFDebertaV2PreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutput | tuple[tf.Tensor]:
outputs = self.deberta(
input_ids=input_ids,
attention_mask=attention_mask,
@ -1424,12 +1422,12 @@ class TFDebertaV2ForMaskedLM(TFDebertaV2PreTrainedModel, TFMaskedLanguageModelin
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1515,12 +1513,12 @@ class TFDebertaV2ForSequenceClassification(TFDebertaV2PreTrainedModel, TFSequenc
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1609,12 +1607,12 @@ class TFDebertaV2ForTokenClassification(TFDebertaV2PreTrainedModel, TFTokenClass
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1692,13 +1690,13 @@ class TFDebertaV2ForQuestionAnswering(TFDebertaV2PreTrainedModel, TFQuestionAnsw
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.
@ -1793,12 +1791,12 @@ class TFDebertaV2ForMultipleChoice(TFDebertaV2PreTrainedModel, TFMultipleChoiceL
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import collections.abc
import math
from dataclasses import dataclass
from typing import Optional, Union
import tensorflow as tf
@ -88,9 +87,9 @@ class TFDeiTForImageClassificationWithTeacherOutput(ModelOutput):
the self-attention heads.
"""
logits: Optional[tf.Tensor] = None
cls_logits: Optional[tf.Tensor] = None
distillation_logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
cls_logits: tf.Tensor | None = None
distillation_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -550,7 +549,7 @@ class TFDeiTEncoder(keras.layers.Layer):
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
@ -630,12 +629,12 @@ class TFDeiTMainLayer(keras.layers.Layer):
pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
interpolate_pos_encoding: bool = False,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor, ...]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor, ...]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -780,12 +779,12 @@ class TFDeiTModel(TFDeiTPreTrainedModel):
pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
interpolate_pos_encoding: bool = False,
training: bool = False,
) -> Union[tuple, TFBaseModelOutputWithPooling]:
) -> tuple | TFBaseModelOutputWithPooling:
outputs = self.deit(
pixel_values=pixel_values,
bool_masked_pos=bool_masked_pos,
@ -910,12 +909,12 @@ class TFDeiTForMaskedImageModeling(TFDeiTPreTrainedModel):
pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
interpolate_pos_encoding: bool = False,
training: bool = False,
) -> Union[tuple, TFMaskedImageModelingOutput]:
) -> tuple | TFMaskedImageModelingOutput:
r"""
bool_masked_pos (`tf.Tensor` of type bool and shape `(batch_size, num_patches)`):
Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
@ -1046,12 +1045,12 @@ class TFDeiTForImageClassification(TFDeiTPreTrainedModel, TFSequenceClassificati
pixel_values: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
interpolate_pos_encoding: bool = False,
training: bool = False,
) -> Union[tf.Tensor, TFImageClassifierOutput]:
) -> tf.Tensor | TFImageClassifierOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
@ -1171,12 +1170,12 @@ class TFDeiTForImageClassificationWithTeacher(TFDeiTPreTrainedModel):
self,
pixel_values: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
interpolate_pos_encoding: bool = False,
training: bool = False,
) -> Union[tuple, TFDeiTForImageClassificationWithTeacherOutput]:
) -> tuple | TFDeiTForImageClassificationWithTeacherOutput:
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
outputs = self.deit(

View File

@ -20,7 +20,6 @@ TF 2.0 Transformer XL model.
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -543,9 +542,9 @@ class TFTransfoXLMainLayer(keras.layers.Layer):
mems: list[tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
):
@ -690,7 +689,7 @@ class TFTransfoXLModelOutput(ModelOutput):
heads.
"""
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
mems: list[tf.Tensor] = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -723,7 +722,7 @@ class TFTransfoXLLMHeadModelOutput(ModelOutput):
heads.
"""
prediction_scores: Optional[tf.Tensor] = None
prediction_scores: tf.Tensor | None = None
mems: list[tf.Tensor] = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -757,7 +756,7 @@ class TFTransfoXLSequenceClassifierOutputWithPast(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
mems: list[tf.Tensor] = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -1047,12 +1046,12 @@ class TFTransfoXLForSequenceClassification(TFTransfoXLPreTrainedModel, TFSequenc
mems: list[tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFTransfoXLSequenceClassifierOutputWithPast]:
training: bool | None = False,
) -> tuple | TFTransfoXLSequenceClassifierOutputWithPast:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,

View File

@ -19,7 +19,6 @@ TF 2.0 DistilBERT model
from __future__ import annotations
import warnings
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -594,11 +593,11 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel):
attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutput | tuple[tf.Tensor]:
outputs = self.distilbert(
input_ids=input_ids,
attention_mask=attention_mask,
@ -697,12 +696,12 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel, TFMaskedLanguageModel
attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -794,12 +793,12 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel, TFSeque
attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -882,12 +881,12 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel, TFTokenCla
attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -969,12 +968,12 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic
attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1071,13 +1070,13 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn
attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -18,7 +18,6 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional, Union
import tensorflow as tf
@ -68,7 +67,7 @@ class TFDPRContextEncoderOutput(ModelOutput):
heads.
"""
pooler_output: Optional[tf.Tensor] = None
pooler_output: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -96,7 +95,7 @@ class TFDPRQuestionEncoderOutput(ModelOutput):
heads.
"""
pooler_output: Optional[tf.Tensor] = None
pooler_output: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -127,9 +126,9 @@ class TFDPRReaderOutput(ModelOutput):
heads.
"""
start_logits: Optional[tf.Tensor] = None
end_logits: Optional[tf.Tensor] = None
relevance_logits: Optional[tf.Tensor] = None
start_logits: tf.Tensor | None = None
end_logits: tf.Tensor | None = None
relevance_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -155,15 +154,15 @@ class TFDPREncoderLayer(keras.layers.Layer):
@unpack_inputs
def call(
self,
input_ids: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor, ...]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor, ...]:
outputs = self.bert_model(
input_ids=input_ids,
attention_mask=attention_mask,
@ -226,14 +225,14 @@ class TFDPRSpanPredictorLayer(keras.layers.Layer):
@unpack_inputs
def call(
self,
input_ids: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: bool = False,
output_hidden_states: bool = False,
return_dict: bool = False,
training: bool = False,
) -> Union[TFDPRReaderOutput, tuple[tf.Tensor, ...]]:
) -> TFDPRReaderOutput | tuple[tf.Tensor, ...]:
# notations: N - number of questions in a batch, M - number of passages per questions, L - sequence length
n_passages, sequence_length = shape_list(input_ids) if input_ids is not None else shape_list(inputs_embeds)[:2]
# feed encoder
@ -296,7 +295,7 @@ class TFDPRSpanPredictor(TFPreTrainedModel):
@unpack_inputs
def call(
self,
input_ids: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
@ -304,7 +303,7 @@ class TFDPRSpanPredictor(TFPreTrainedModel):
output_hidden_states: bool = False,
return_dict: bool = False,
training: bool = False,
) -> Union[TFDPRReaderOutput, tuple[tf.Tensor, ...]]:
) -> TFDPRReaderOutput | tuple[tf.Tensor, ...]:
outputs = self.encoder(
input_ids=input_ids,
attention_mask=attention_mask,
@ -329,7 +328,7 @@ class TFDPREncoder(TFPreTrainedModel):
@unpack_inputs
def call(
self,
input_ids: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
@ -337,7 +336,7 @@ class TFDPREncoder(TFPreTrainedModel):
output_hidden_states: bool = False,
return_dict: bool = False,
training: bool = False,
) -> Union[TFDPRReaderOutput, tuple[tf.Tensor, ...]]:
) -> TFDPRReaderOutput | tuple[tf.Tensor, ...]:
outputs = self.encoder(
input_ids=input_ids,
attention_mask=attention_mask,

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math
import warnings
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -461,12 +460,12 @@ class TFElectraEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool],
use_cache: bool | None,
output_attentions: bool,
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -601,10 +600,10 @@ class TFElectraEmbeddings(keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call
def call(
self,
input_ids: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
token_type_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
past_key_values_length=0,
training: bool = False,
) -> tf.Tensor:
@ -806,13 +805,13 @@ class TFElectraMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder:
use_cache = False
@ -931,7 +930,7 @@ class TFElectraForPreTrainingOutput(ModelOutput):
heads.
"""
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -1057,13 +1056,13 @@ class TFElectraModel(TFElectraPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1139,11 +1138,11 @@ class TFElectraForPreTraining(TFElectraPreTrainedModel):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFElectraForPreTrainingOutput, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFElectraForPreTrainingOutput | tuple[tf.Tensor]:
r"""
Returns:
@ -1281,12 +1280,12 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel, TFMaskedLanguageModelingLos
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1410,12 +1409,12 @@ class TFElectraForSequenceClassification(TFElectraPreTrainedModel, TFSequenceCla
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1496,12 +1495,12 @@ class TFElectraForMultipleChoice(TFElectraPreTrainedModel, TFMultipleChoiceLoss)
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1607,12 +1606,12 @@ class TFElectraForTokenClassification(TFElectraPreTrainedModel, TFTokenClassific
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1695,13 +1694,13 @@ class TFElectraForQuestionAnswering(TFElectraPreTrainedModel, TFQuestionAnswerin
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import inspect
import re
import warnings
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -204,9 +203,9 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss):
def __init__(
self,
config: Optional[PretrainedConfig] = None,
encoder: Optional[TFPreTrainedModel] = None,
decoder: Optional[TFPreTrainedModel] = None,
config: PretrainedConfig | None = None,
encoder: TFPreTrainedModel | None = None,
decoder: TFPreTrainedModel | None = None,
):
if config is None and (encoder is None or decoder is None):
raise ValueError("Either a configuration or an encoder and a decoder has to be provided.")
@ -311,8 +310,8 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss):
@classmethod
def from_encoder_decoder_pretrained(
cls,
encoder_pretrained_model_name_or_path: Optional[str] = None,
decoder_pretrained_model_name_or_path: Optional[str] = None,
encoder_pretrained_model_name_or_path: str | None = None,
decoder_pretrained_model_name_or_path: str | None = None,
*model_args,
**kwargs,
) -> TFPreTrainedModel:
@ -465,13 +464,13 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
**kwargs,
) -> Union[TFSeq2SeqLMOutput, tuple[tf.Tensor]]:
) -> TFSeq2SeqLMOutput | tuple[tf.Tensor]:
r"""
Returns:

View File

@ -17,7 +17,6 @@
from __future__ import annotations
import os
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -331,7 +330,7 @@ class TFEsmSelfAttention(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None = None,
encoder_attention_mask: tf.Tensor | None = None,
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
output_attentions: Optional[bool] = False,
output_attentions: bool | None = False,
training: bool = False,
) -> tuple[tf.Tensor]:
mixed_query_layer = self.query(hidden_states)
@ -934,13 +933,13 @@ class TFEsmMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder:
use_cache = False
@ -1117,13 +1116,13 @@ class TFEsmModel(TFEsmPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]:
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1222,11 +1221,11 @@ class TFEsmForMaskedLM(TFEsmPreTrainedModel, TFMaskedLanguageModelingLoss):
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1370,11 +1369,11 @@ class TFEsmForSequenceClassification(TFEsmPreTrainedModel, TFSequenceClassificat
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1457,11 +1456,11 @@ class TFEsmForTokenClassification(TFEsmPreTrainedModel, TFTokenClassificationLos
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

View File

@ -17,7 +17,7 @@ from __future__ import annotations
from collections.abc import Sequence
from functools import cache
from typing import Any, Callable, Optional
from typing import Any, Callable
import numpy as np
import torch
@ -78,8 +78,8 @@ def rot_vec_mul(r: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
@cache
def identity_rot_mats(
batch_dims: tuple[int, ...],
dtype: Optional[torch.dtype] = None,
device: Optional[torch.device] = None,
dtype: torch.dtype | None = None,
device: torch.device | None = None,
requires_grad: bool = True,
) -> torch.Tensor:
rots = torch.eye(3, dtype=dtype, device=device, requires_grad=requires_grad)
@ -93,8 +93,8 @@ def identity_rot_mats(
@cache
def identity_trans(
batch_dims: tuple[int, ...],
dtype: Optional[torch.dtype] = None,
device: Optional[torch.device] = None,
dtype: torch.dtype | None = None,
device: torch.device | None = None,
requires_grad: bool = True,
) -> torch.Tensor:
trans = torch.zeros((*batch_dims, 3), dtype=dtype, device=device, requires_grad=requires_grad)
@ -104,8 +104,8 @@ def identity_trans(
@cache
def identity_quats(
batch_dims: tuple[int, ...],
dtype: Optional[torch.dtype] = None,
device: Optional[torch.device] = None,
dtype: torch.dtype | None = None,
device: torch.device | None = None,
requires_grad: bool = True,
) -> torch.Tensor:
quat = torch.zeros((*batch_dims, 4), dtype=dtype, device=device, requires_grad=requires_grad)
@ -260,8 +260,8 @@ class Rotation:
def __init__(
self,
rot_mats: Optional[torch.Tensor] = None,
quats: Optional[torch.Tensor] = None,
rot_mats: torch.Tensor | None = None,
quats: torch.Tensor | None = None,
normalize_quats: bool = True,
):
"""
@ -295,8 +295,8 @@ class Rotation:
@staticmethod
def identity(
shape,
dtype: Optional[torch.dtype] = None,
device: Optional[torch.device] = None,
dtype: torch.dtype | None = None,
device: torch.device | None = None,
requires_grad: bool = True,
fmt: str = "quat",
) -> Rotation:
@ -682,7 +682,7 @@ class Rotation:
else:
raise ValueError("Both rotations are None")
def to(self, device: Optional[torch.device], dtype: Optional[torch.dtype]) -> Rotation:
def to(self, device: torch.device | None, dtype: torch.dtype | None) -> Rotation:
"""
Analogous to the to() method of torch Tensors
@ -734,7 +734,7 @@ class Rigid:
dimensions of its component parts.
"""
def __init__(self, rots: Optional[Rotation], trans: Optional[torch.Tensor]):
def __init__(self, rots: Rotation | None, trans: torch.Tensor | None):
"""
Args:
rots: A [*, 3, 3] rotation tensor
@ -786,8 +786,8 @@ class Rigid:
@staticmethod
def identity(
shape: tuple[int, ...],
dtype: Optional[torch.dtype] = None,
device: Optional[torch.device] = None,
dtype: torch.dtype | None = None,
device: torch.device | None = None,
requires_grad: bool = True,
fmt: str = "quat",
) -> Rigid:

View File

@ -22,7 +22,6 @@ import itertools
import random
import warnings
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -261,14 +260,14 @@ class TFFlaubertModel(TFFlaubertPreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None,
cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFBaseModelOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFBaseModelOutput:
outputs = self.transformer(
input_ids=input_ids,
attention_mask=attention_mask,
@ -544,14 +543,14 @@ class TFFlaubertMainLayer(keras.layers.Layer):
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None,
cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFBaseModelOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFBaseModelOutput:
# removed: src_enc=None, src_len=None
if input_ids is not None and inputs_embeds is not None:
@ -808,7 +807,7 @@ class TFFlaubertWithLMHeadModelOutput(ModelOutput):
heads.
"""
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -864,14 +863,14 @@ class TFFlaubertWithLMHeadModel(TFFlaubertPreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None,
cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFFlaubertWithLMHeadModelOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFFlaubertWithLMHeadModelOutput:
transformer_outputs = self.transformer(
input_ids=input_ids,
attention_mask=attention_mask,
@ -940,15 +939,15 @@ class TFFlaubertForSequenceClassification(TFFlaubertPreTrainedModel, TFSequenceC
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None,
cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1031,16 +1030,16 @@ class TFFlaubertForQuestionAnsweringSimple(TFFlaubertPreTrainedModel, TFQuestion
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None,
cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.
@ -1138,15 +1137,15 @@ class TFFlaubertForTokenClassification(TFFlaubertPreTrainedModel, TFTokenClassif
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None,
cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1251,15 +1250,15 @@ class TFFlaubertForMultipleChoice(TFFlaubertPreTrainedModel, TFMultipleChoiceLos
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None,
cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
if input_ids is not None:
num_choices = shape_list(input_ids)[1]
seq_length = shape_list(input_ids)[2]

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import warnings
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -1104,7 +1103,7 @@ class TFFunnelForPreTrainingOutput(ModelOutput):
heads.
"""
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -1224,11 +1223,11 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel):
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], TFBaseModelOutput]:
) -> tuple[tf.Tensor] | TFBaseModelOutput:
return self.funnel(
input_ids=input_ids,
attention_mask=attention_mask,
@ -1280,11 +1279,11 @@ class TFFunnelModel(TFFunnelPreTrainedModel):
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], TFBaseModelOutput]:
) -> tuple[tf.Tensor] | TFBaseModelOutput:
return self.funnel(
input_ids=input_ids,
attention_mask=attention_mask,
@ -1336,12 +1335,12 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
**kwargs,
) -> Union[tuple[tf.Tensor], TFFunnelForPreTrainingOutput]:
) -> tuple[tf.Tensor] | TFFunnelForPreTrainingOutput:
r"""
Returns:
@ -1426,12 +1425,12 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss)
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], TFMaskedLMOutput]:
) -> tuple[tf.Tensor] | TFMaskedLMOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1509,12 +1508,12 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], TFSequenceClassifierOutput]:
) -> tuple[tf.Tensor] | TFSequenceClassifierOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1598,12 +1597,12 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss):
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], TFMultipleChoiceModelOutput]:
) -> tuple[tf.Tensor] | TFMultipleChoiceModelOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1705,12 +1704,12 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], TFTokenClassifierOutput]:
) -> tuple[tf.Tensor] | TFTokenClassifierOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1793,13 +1792,13 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], TFQuestionAnsweringModelOutput]:
) -> tuple[tf.Tensor] | TFQuestionAnsweringModelOutput:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -18,7 +18,6 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -396,7 +395,7 @@ class TFGPT2MainLayer(keras.layers.Layer):
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
@ -404,12 +403,12 @@ class TFGPT2MainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None:
@ -628,8 +627,8 @@ class TFGPT2DoubleHeadsModelOutput(ModelOutput):
heads.
"""
logits: Optional[tf.Tensor] = None
mc_logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
mc_logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -764,7 +763,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
@ -772,12 +771,12 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -880,7 +879,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
@ -888,13 +887,13 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -991,19 +990,19 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
mc_token_ids: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFGPT2DoubleHeadsModelOutput, tuple[tf.Tensor]]:
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFGPT2DoubleHeadsModelOutput | tuple[tf.Tensor]:
r"""
mc_token_ids (`tf.Tensor` or `Numpy array` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input):
Index of the classification token in each input sequence. Selected in the range `[0, input_ids.size(-1) -
@ -1145,19 +1144,19 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutputWithPast, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutputWithPast | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,

View File

@ -16,8 +16,6 @@
from __future__ import annotations
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -203,7 +201,7 @@ class TFGPTJAttention(keras.layers.Layer):
def call(
self,
hidden_states: tf.Tensor,
layer_past: Optional[tuple[tf.Tensor, tf.Tensor]] = None,
layer_past: tuple[tf.Tensor, tf.Tensor] | None = None,
attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
@ -428,7 +426,7 @@ class TFGPTJMainLayer(keras.layers.Layer):
output_hidden_states=None,
return_dict=None,
training=False,
) -> Union[TFBaseModelOutputWithPast, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPast | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None:
@ -694,18 +692,18 @@ class TFGPTJModel(TFGPTJPreTrainedModel):
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPast, tuple[tf.Tensor]]:
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPast | tuple[tf.Tensor]:
r"""
use_cache (`bool`, *optional*, defaults to `True`):
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
@ -794,19 +792,19 @@ class TFGPTJForCausalLM(TFGPTJPreTrainedModel, TFCausalLanguageModelingLoss):
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFCausalLMOutputWithPast, tuple[tf.Tensor]]:
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFCausalLMOutputWithPast | tuple[tf.Tensor]:
r"""
labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size, input_ids_length)`, *optional*):
Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set
@ -902,19 +900,19 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutputWithPast, tuple[tf.Tensor]]:
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFSequenceClassifierOutputWithPast | tuple[tf.Tensor]:
r"""
labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1019,7 +1017,7 @@ class TFGPTJForQuestionAnswering(TFGPTJPreTrainedModel, TFQuestionAnsweringLoss)
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
@ -1027,11 +1025,11 @@ class TFGPTJForQuestionAnswering(TFGPTJPreTrainedModel, TFQuestionAnsweringLoss)
inputs_embeds: np.ndarray | tf.Tensor | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`np.ndarray` or `tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -19,7 +19,7 @@ from __future__ import annotations
import collections.abc
import math
from dataclasses import dataclass
from typing import Any, Optional, Union
from typing import Any
import numpy as np
import tensorflow as tf
@ -79,7 +79,7 @@ LARGE_NEGATIVE = -1e8
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -253,11 +253,11 @@ class TFGroupViTModelOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits_per_image: Optional[tf.Tensor] = None
logits_per_text: Optional[tf.Tensor] = None
segmentation_logits: Optional[tf.Tensor] = None
text_embeds: Optional[tf.Tensor] = None
image_embeds: Optional[tf.Tensor] = None
logits_per_image: tf.Tensor | None = None
logits_per_text: tf.Tensor | None = None
segmentation_logits: tf.Tensor | None = None
text_embeds: tf.Tensor | None = None
image_embeds: tf.Tensor | None = None
text_model_output: TFBaseModelOutputWithPooling = None
vision_model_output: TFBaseModelOutputWithPooling = None
@ -646,9 +646,9 @@ class TFGroupViTTextEmbeddings(keras.layers.Layer):
def call(
self,
input_ids: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
) -> tf.Tensor:
"""
Applies embedding based on inputs tensor.
@ -809,9 +809,9 @@ class TFGroupViTMLP(keras.layers.Layer):
def __init__(
self,
config: GroupViTVisionConfig,
hidden_size: Optional[int] = None,
intermediate_size: Optional[int] = None,
output_size: Optional[int] = None,
hidden_size: int | None = None,
intermediate_size: int | None = None,
output_size: int | None = None,
**kwargs,
):
super().__init__(**kwargs)
@ -898,10 +898,10 @@ class TFGroupViTAttention(keras.layers.Layer):
def call(
self,
hidden_states: tf.Tensor,
attention_mask: Optional[tf.Tensor] = None,
causal_attention_mask: Optional[tf.Tensor] = None,
output_attentions: Optional[bool] = None,
encoder_hidden_states: Optional[tf.Tensor] = None,
attention_mask: tf.Tensor | None = None,
causal_attention_mask: tf.Tensor | None = None,
output_attentions: bool | None = None,
encoder_hidden_states: tf.Tensor | None = None,
training: bool = False,
) -> tuple[tf.Tensor]:
"""Input shape: Batch x Time x Channel"""
@ -1060,7 +1060,7 @@ class TFGroupViTTextEncoder(keras.layers.Layer):
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[tuple, TFBaseModelOutput]:
) -> tuple | TFBaseModelOutput:
encoder_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
@ -1121,7 +1121,7 @@ class TFGroupViTVisionEncoder(keras.layers.Layer):
output_attentions: bool,
return_dict: bool,
training: bool = False,
) -> Union[tuple, TFBaseModelOutput]:
) -> tuple | TFBaseModelOutput:
all_hidden_states = () if output_hidden_states else None
all_groupings = () if output_attentions else None
@ -1180,7 +1180,7 @@ class TFGroupViTTextTransformer(keras.layers.Layer):
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
input_shape = shape_list(input_ids)
embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids)
@ -1292,7 +1292,7 @@ class TFGroupViTVisionTransformer(keras.layers.Layer):
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[tuple, TFBaseModelOutputWithPooling]:
) -> tuple | TFBaseModelOutputWithPooling:
embedding_output = self.embeddings(pixel_values)
encoder_outputs = self.encoder(
@ -1356,11 +1356,11 @@ class TFGroupViTTextMainLayer(keras.layers.Layer):
input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if input_ids is None:
raise ValueError("You have to specify input_ids")
@ -1407,11 +1407,11 @@ class TFGroupViTVisionMainLayer(keras.layers.Layer):
def call(
self,
pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if pixel_values is None:
raise ValueError("You have to specify pixel_values")
@ -1518,9 +1518,9 @@ class TFGroupViTMainLayer(keras.layers.Layer):
input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> tf.Tensor:
if input_ids is None:
@ -1552,9 +1552,9 @@ class TFGroupViTMainLayer(keras.layers.Layer):
def get_image_features(
self,
pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> tf.Tensor:
if pixel_values is None:
@ -1582,13 +1582,13 @@ class TFGroupViTMainLayer(keras.layers.Layer):
pixel_values: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
return_loss: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
output_segmentation: Optional[bool] = None,
return_dict: Optional[bool] = None,
return_loss: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
output_segmentation: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFGroupViTModelOutput, tuple[tf.Tensor]]:
) -> TFGroupViTModelOutput | tuple[tf.Tensor]:
if input_ids is None:
raise ValueError("You have to specify either input_ids")
if pixel_values is None:
@ -1867,11 +1867,11 @@ class TFGroupViTTextModel(TFGroupViTPreTrainedModel):
input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
r"""
Returns:
@ -1926,11 +1926,11 @@ class TFGroupViTVisionModel(TFGroupViTPreTrainedModel):
def call(
self,
pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
r"""
Returns:
@ -1989,9 +1989,9 @@ class TFGroupViTModel(TFGroupViTPreTrainedModel):
input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> tf.Tensor:
r"""
@ -2028,9 +2028,9 @@ class TFGroupViTModel(TFGroupViTPreTrainedModel):
def get_image_features(
self,
pixel_values: TFModelInputType | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> tf.Tensor:
r"""
@ -2075,13 +2075,13 @@ class TFGroupViTModel(TFGroupViTPreTrainedModel):
pixel_values: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
return_loss: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
output_segmentation: Optional[bool] = None,
return_dict: Optional[bool] = None,
return_loss: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
output_segmentation: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFGroupViTModelOutput, tuple[tf.Tensor]]:
) -> TFGroupViTModelOutput | tuple[tf.Tensor]:
r"""
Returns:

View File

@ -17,7 +17,7 @@
from __future__ import annotations
import warnings
from typing import Any, Optional, Union
from typing import Any
import numpy as np
import tensorflow as tf
@ -152,7 +152,7 @@ def _compute_mask_indices(
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -701,7 +701,7 @@ class TFHubertAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel"""
@ -896,7 +896,7 @@ class TFHubertEncoderLayer(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False,
output_attentions: bool | None = False,
training: bool = False,
) -> tuple[tf.Tensor]:
attn_residual = hidden_states
@ -956,7 +956,7 @@ class TFHubertEncoderLayerStableLayerNorm(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False,
output_attentions: bool | None = False,
training: bool = False,
) -> tuple[tf.Tensor]:
attn_residual = hidden_states
@ -1007,11 +1007,11 @@ class TFHubertEncoder(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False,
output_hidden_states: Optional[bool] = False,
return_dict: Optional[bool] = True,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
output_attentions: bool | None = False,
output_hidden_states: bool | None = False,
return_dict: bool | None = True,
training: bool | None = False,
) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
@ -1090,11 +1090,11 @@ class TFHubertEncoderStableLayerNorm(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False,
output_hidden_states: Optional[bool] = False,
return_dict: Optional[bool] = True,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
output_attentions: bool | None = False,
output_hidden_states: bool | None = False,
return_dict: bool | None = True,
training: bool | None = False,
) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
@ -1260,7 +1260,7 @@ class TFHubertMainLayer(keras.layers.Layer):
inputs_embeds: tf.Tensor | None = None,
output_attentions: tf.Tensor | None = None,
output_hidden_states: tf.Tensor | None = None,
return_dict: Optional[bool] = None,
return_dict: bool | None = None,
training: bool = False,
**kwargs: Any,
):
@ -1445,11 +1445,11 @@ class TFHubertModel(TFHubertPreTrainedModel):
position_ids: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
"""
Returns:
@ -1551,12 +1551,12 @@ class TFHubertForCTC(TFHubertPreTrainedModel):
position_ids: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_attentions: bool | None = None,
labels: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFCausalLMOutput, tuple[tf.Tensor]]:
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFCausalLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,

View File

@ -22,7 +22,6 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional, Union
import tensorflow as tf
@ -91,11 +90,11 @@ class TFIdeficsBaseModelOutputWithPast(ModelOutput):
image_hidden_states of the model produced by the vision encoder, and optionally by the perceiver
"""
last_hidden_state: Optional[tf.Tensor] = None
past_key_values: Optional[tuple[tuple[tf.Tensor]]] = None
hidden_states: Optional[tuple[tf.Tensor]] = None
attentions: Optional[tuple[tf.Tensor]] = None
image_hidden_states: Optional[tuple[tf.Tensor]] = None
last_hidden_state: tf.Tensor | None = None
past_key_values: tuple[tuple[tf.Tensor]] | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
image_hidden_states: tuple[tf.Tensor] | None = None
@dataclass
@ -132,12 +131,12 @@ class TFIdeficsCausalLMOutputWithPast(ModelOutput):
image_hidden_states of the model produced by the vision encoder, and optionally by the perceiver
"""
loss: Optional[tf.Tensor] = None
logits: Optional[tf.Tensor] = None
past_key_values: Optional[list[tf.Tensor]] = None
hidden_states: Optional[tuple[tf.Tensor]] = None
attentions: Optional[tuple[tf.Tensor]] = None
image_hidden_states: Optional[tuple[tf.Tensor]] = None
loss: tf.Tensor | None = None
logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
image_hidden_states: tuple[tf.Tensor] | None = None
def expand_inputs_for_generation(
@ -278,7 +277,7 @@ class TFIdeficsDecoupledEmbedding(tf.keras.layers.Embedding):
num_embeddings,
num_additional_embeddings,
embedding_dim,
partially_freeze: Optional[bool] = False,
partially_freeze: bool | None = False,
dtype=None,
**kwargs,
) -> None:
@ -658,13 +657,13 @@ class TFIdeficsAttention(tf.keras.layers.Layer):
def call(
self,
hidden_states: tf.Tensor,
key_value_states: Optional[tf.Tensor] = None,
attention_mask: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
past_key_value: Optional[tuple[tf.Tensor]] = None,
key_value_states: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
past_key_value: tuple[tf.Tensor] | None = None,
output_attentions: bool = False,
use_cache: bool = False,
) -> tuple[tf.Tensor, Optional[tf.Tensor], Optional[tuple[tf.Tensor]]]:
) -> tuple[tf.Tensor, tf.Tensor | None, tuple[tf.Tensor] | None]:
# if key_value_states are provided this layer is used as a cross-attention layer
is_cross_attention = self.is_cross_attention or key_value_states is not None
@ -791,13 +790,13 @@ class TFIdeficsDecoderLayer(tf.keras.layers.Layer):
def call(
self,
hidden_states: tf.Tensor,
attention_mask: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
past_key_value: Optional[tuple[tf.Tensor]] = None,
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
past_key_value: tuple[tf.Tensor] | None = None,
output_attentions: bool | None = False,
use_cache: bool | None = False,
training=False,
) -> tuple[tf.Tensor, Optional[tuple[tf.Tensor, tf.Tensor]]]:
) -> tuple[tf.Tensor, tuple[tf.Tensor, tf.Tensor] | None]:
"""
Args:
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
@ -979,14 +978,14 @@ class TFIdeficsGatedCrossAttentionLayer(tf.keras.layers.Layer):
def call(
self,
hidden_states: tf.Tensor,
attention_mask: Optional[tf.Tensor] = None,
image_hidden_states: Optional[tf.Tensor] = None,
image_attention_mask: Optional[tf.Tensor] = None,
cross_attention_gate: Optional[tf.Tensor] = None,
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
past_key_value: Optional[tuple[tf.Tensor]] = None,
) -> tuple[tf.Tensor, Optional[tuple[tf.Tensor, tf.Tensor]]]:
attention_mask: tf.Tensor | None = None,
image_hidden_states: tf.Tensor | None = None,
image_attention_mask: tf.Tensor | None = None,
cross_attention_gate: tf.Tensor | None = None,
output_attentions: bool | None = False,
use_cache: bool | None = False,
past_key_value: tuple[tf.Tensor] | None = None,
) -> tuple[tf.Tensor, tuple[tf.Tensor, tf.Tensor] | None]:
"""
Args:
hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
@ -1255,21 +1254,21 @@ class TFIdeficsMainLayer(tf.keras.layers.Layer):
def call(
self,
input_ids: TFModelInputType | None = None,
attention_mask: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
past_key_values: Optional[list[tf.Tensor]] = None,
inputs_embeds: Optional[tf.Tensor] = None,
pixel_values: Optional[tf.Tensor] = None,
image_encoder_embeddings: Optional[tf.Tensor] = None,
perceiver_embeddings: Optional[tf.Tensor] = None,
image_attention_mask: Optional[tf.Tensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
interpolate_pos_encoding: Optional[bool] = False,
return_dict: Optional[bool] = None,
training: Optional[bool] = None,
) -> Union[TFIdeficsBaseModelOutputWithPast, tuple[tf.Tensor]]:
attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: tf.Tensor | None = None,
pixel_values: tf.Tensor | None = None,
image_encoder_embeddings: tf.Tensor | None = None,
perceiver_embeddings: tf.Tensor | None = None,
image_attention_mask: tf.Tensor | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
interpolate_pos_encoding: bool | None = False,
return_dict: bool | None = None,
training: bool | None = None,
) -> TFIdeficsBaseModelOutputWithPast | tuple[tf.Tensor]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -1554,21 +1553,21 @@ class TFIdeficsModel(TFIdeficsPreTrainedModel):
def call(
self,
input_ids: TFModelInputType | None = None,
attention_mask: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
past_key_values: Optional[list[tf.Tensor]] = None,
inputs_embeds: Optional[tf.Tensor] = None,
pixel_values: Optional[tf.Tensor] = None,
image_encoder_embeddings: Optional[tf.Tensor] = None,
perceiver_embeddings: Optional[tf.Tensor] = None,
image_attention_mask: Optional[tf.Tensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
interpolate_pos_encoding: Optional[bool] = False,
return_dict: Optional[bool] = None,
training: Optional[bool] = None,
) -> Union[TFIdeficsBaseModelOutputWithPast, tuple[tf.Tensor]]:
attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: tf.Tensor | None = None,
pixel_values: tf.Tensor | None = None,
image_encoder_embeddings: tf.Tensor | None = None,
perceiver_embeddings: tf.Tensor | None = None,
image_attention_mask: tf.Tensor | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
interpolate_pos_encoding: bool | None = False,
return_dict: bool | None = None,
training: bool | None = None,
) -> TFIdeficsBaseModelOutputWithPast | tuple[tf.Tensor]:
outputs = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
@ -1659,22 +1658,22 @@ class TFIdeficsForVisionText2Text(TFPreTrainedModel, TFCausalLanguageModelingLos
def call(
self,
input_ids: TFModelInputType | None = None,
attention_mask: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
past_key_values: Optional[list[tf.Tensor]] = None,
inputs_embeds: Optional[tf.Tensor] = None,
pixel_values: Optional[tf.Tensor] = None,
image_encoder_embeddings: Optional[tf.Tensor] = None,
perceiver_embeddings: Optional[tf.Tensor] = None,
image_attention_mask: Optional[tf.Tensor] = None,
labels: Optional[tf.Tensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
interpolate_pos_encoding: Optional[bool] = False,
return_dict: Optional[bool] = None,
attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
past_key_values: list[tf.Tensor] | None = None,
inputs_embeds: tf.Tensor | None = None,
pixel_values: tf.Tensor | None = None,
image_encoder_embeddings: tf.Tensor | None = None,
perceiver_embeddings: tf.Tensor | None = None,
image_attention_mask: tf.Tensor | None = None,
labels: tf.Tensor | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
interpolate_pos_encoding: bool | None = False,
return_dict: bool | None = None,
training=False,
) -> Union[TFIdeficsCausalLMOutputWithPast, tuple[tf.Tensor]]:
) -> TFIdeficsCausalLMOutputWithPast | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import math
import warnings
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -127,11 +126,11 @@ class TFLayoutLMEmbeddings(keras.layers.Layer):
def call(
self,
input_ids: Optional[tf.Tensor] = None,
bbox: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
token_type_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
bbox: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
training: bool = False,
) -> tf.Tensor:
"""
@ -584,12 +583,12 @@ class TFLayoutLMEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool],
use_cache: bool | None,
output_attentions: bool,
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -825,11 +824,11 @@ class TFLayoutLMMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None:
@ -1070,11 +1069,11 @@ class TFLayoutLMModel(TFLayoutLMPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
r"""
Returns:
@ -1175,12 +1174,12 @@ class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingL
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1304,12 +1303,12 @@ class TFLayoutLMForSequenceClassification(TFLayoutLMPreTrainedModel, TFSequenceC
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1440,12 +1439,12 @@ class TFLayoutLMForTokenClassification(TFLayoutLMPreTrainedModel, TFTokenClassif
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1572,13 +1571,13 @@ class TFLayoutLMForQuestionAnswering(TFLayoutLMPreTrainedModel, TFQuestionAnswer
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import collections
import math
from typing import Optional, Union
import tensorflow as tf
@ -231,7 +230,7 @@ class TFLayoutLMv3TextEmbeddings(keras.layers.Layer):
def call(
self,
input_ids: tf.Tensor | None = None,
bbox: Optional[tf.Tensor] = None,
bbox: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
@ -341,7 +340,7 @@ class TFLayoutLMv3SelfAttention(keras.layers.Layer):
x = tf.reshape(x, new_shape)
return tf.transpose(x, perm=[0, 2, 1, 3]) # batch_size, num_heads, seq_length, attention_head_size
def cogview_attention(self, attention_scores: tf.Tensor, alpha: Union[float, int] = 32):
def cogview_attention(self, attention_scores: tf.Tensor, alpha: float | int = 32):
"""
https://huggingface.co/papers/2105.13290 Section 2.4 Stabilization of training: Precision Bottleneck Relaxation
(PB-Relax). A replacement of the original keras.layers.Softmax(axis=-1)(attention_scores). Seems the new
@ -363,7 +362,7 @@ class TFLayoutLMv3SelfAttention(keras.layers.Layer):
rel_pos: tf.Tensor | None = None,
rel_2d_pos: tf.Tensor | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], tuple[tf.Tensor, tf.Tensor]]:
) -> tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor]:
key_layer = self.transpose_for_scores(self.key(hidden_states))
value_layer = self.transpose_for_scores(self.value(hidden_states))
query_layer = self.transpose_for_scores(self.query(hidden_states))
@ -468,7 +467,7 @@ class TFLayoutLMv3Attention(keras.layers.Layer):
rel_pos: tf.Tensor | None = None,
rel_2d_pos: tf.Tensor | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], tuple[tf.Tensor, tf.Tensor]]:
) -> tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor]:
self_outputs = self.self_attention(
hidden_states,
attention_mask,
@ -571,7 +570,7 @@ class TFLayoutLMv3Layer(keras.layers.Layer):
rel_pos: tf.Tensor | None = None,
rel_2d_pos: tf.Tensor | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], tuple[tf.Tensor, tf.Tensor]]:
) -> tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor]:
self_attention_outputs = self.attention(
hidden_states,
attention_mask,
@ -711,12 +710,7 @@ class TFLayoutLMv3Encoder(keras.layers.Layer):
return_dict: bool = True,
position_ids: tf.Tensor | None = None,
training: bool = False,
) -> Union[
TFBaseModelOutput,
tuple[tf.Tensor],
tuple[tf.Tensor, tf.Tensor],
tuple[tf.Tensor, tf.Tensor, tf.Tensor],
]:
) -> TFBaseModelOutput | tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor] | tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
@ -926,7 +920,7 @@ class TFLayoutLMv3MainLayer(keras.layers.Layer):
return extended_attention_mask
def get_head_mask(self, head_mask: tf.Tensor | None) -> Union[tf.Tensor, list[tf.Tensor | None]]:
def get_head_mask(self, head_mask: tf.Tensor | None) -> tf.Tensor | list[tf.Tensor | None]:
if head_mask is None:
return [None] * self.config.num_hidden_layers
@ -962,16 +956,11 @@ class TFLayoutLMv3MainLayer(keras.layers.Layer):
head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
pixel_values: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[
TFBaseModelOutput,
tuple[tf.Tensor],
tuple[tf.Tensor, tf.Tensor],
tuple[tf.Tensor, tf.Tensor, tf.Tensor],
]:
) -> TFBaseModelOutput | tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor] | tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
# This method can be called with a variety of modalities:
# 1. text + layout
# 2. text + layout + image
@ -1274,16 +1263,11 @@ class TFLayoutLMv3Model(TFLayoutLMv3PreTrainedModel):
head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
pixel_values: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[
TFBaseModelOutput,
tuple[tf.Tensor],
tuple[tf.Tensor, tf.Tensor],
tuple[tf.Tensor, tf.Tensor, tf.Tensor],
]:
) -> TFBaseModelOutput | tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor] | tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
r"""
Returns:
@ -1413,19 +1397,19 @@ class TFLayoutLMv3ForSequenceClassification(TFLayoutLMv3PreTrainedModel, TFSeque
head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
bbox: tf.Tensor | None = None,
pixel_values: tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[
TFSequenceClassifierOutput,
tuple[tf.Tensor],
tuple[tf.Tensor, tf.Tensor],
tuple[tf.Tensor, tf.Tensor, tf.Tensor],
tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor],
]:
training: bool | None = False,
) -> (
TFSequenceClassifierOutput
| tuple[tf.Tensor]
| tuple[tf.Tensor, tf.Tensor]
| tuple[tf.Tensor, tf.Tensor, tf.Tensor]
| tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]
):
"""
Returns:
@ -1539,18 +1523,18 @@ class TFLayoutLMv3ForTokenClassification(TFLayoutLMv3PreTrainedModel, TFTokenCla
head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
pixel_values: tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[
TFTokenClassifierOutput,
tuple[tf.Tensor],
tuple[tf.Tensor, tf.Tensor],
tuple[tf.Tensor, tf.Tensor, tf.Tensor],
tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor],
]:
training: bool | None = False,
) -> (
TFTokenClassifierOutput
| tuple[tf.Tensor]
| tuple[tf.Tensor, tf.Tensor]
| tuple[tf.Tensor, tf.Tensor, tf.Tensor]
| tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]
):
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1668,19 +1652,19 @@ class TFLayoutLMv3ForQuestionAnswering(TFLayoutLMv3PreTrainedModel, TFQuestionAn
inputs_embeds: tf.Tensor | None = None,
start_positions: tf.Tensor | None = None,
end_positions: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
bbox: tf.Tensor | None = None,
pixel_values: tf.Tensor | None = None,
return_dict: Optional[bool] = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[
TFQuestionAnsweringModelOutput,
tuple[tf.Tensor],
tuple[tf.Tensor, tf.Tensor],
tuple[tf.Tensor, tf.Tensor, tf.Tensor],
tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor],
]:
) -> (
TFQuestionAnsweringModelOutput
| tuple[tf.Tensor]
| tuple[tf.Tensor, tf.Tensor]
| tuple[tf.Tensor, tf.Tensor, tf.Tensor]
| tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]
):
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import random
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -100,7 +99,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -1470,7 +1469,7 @@ class TFLEDEncoderBaseModelOutput(ModelOutput):
in the sequence.
"""
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None
@ -1533,7 +1532,7 @@ class TFLEDSeq2SeqModelOutput(ModelOutput):
in the sequence.
"""
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
decoder_hidden_states: tuple[tf.Tensor, ...] | None = None
decoder_attentions: tuple[tf.Tensor, ...] | None = None
@ -1600,7 +1599,7 @@ class TFLEDSeq2SeqLMOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
decoder_hidden_states: tuple[tf.Tensor, ...] | None = None
decoder_attentions: tuple[tf.Tensor, ...] | None = None
@ -1731,7 +1730,7 @@ class TFLEDEncoder(keras.layers.Layer):
config: LEDConfig
"""
def __init__(self, config: LEDConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: LEDConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.dropout = keras.layers.Dropout(config.dropout)
@ -2001,7 +2000,7 @@ class TFLEDDecoder(keras.layers.Layer):
embed_tokens: output embedding
"""
def __init__(self, config: LEDConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: LEDConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.padding_idx = config.pad_token_id
@ -2253,7 +2252,7 @@ class TFLEDMainLayer(keras.layers.Layer):
decoder_attention_mask=None,
head_mask=None,
decoder_head_mask=None,
encoder_outputs: Optional[Union[tuple, TFLEDEncoderBaseModelOutput]] = None,
encoder_outputs: tuple | TFLEDEncoderBaseModelOutput | None = None,
global_attention_mask=None,
past_key_values=None,
inputs_embeds=None,
@ -2509,7 +2508,7 @@ class TFLEDForConditionalGeneration(TFLEDPreTrainedModel):
decoder_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: TFLEDEncoderBaseModelOutput | None = None,
global_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: tuple[tuple[Union[np.ndarray, tf.Tensor]]] | None = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: bool | None = None,

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import warnings
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -93,7 +92,7 @@ class TFLongformerBaseModelOutput(ModelOutput):
in the sequence.
"""
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None
@ -140,8 +139,8 @@ class TFLongformerBaseModelOutputWithPooling(ModelOutput):
in the sequence.
"""
last_hidden_state: Optional[tf.Tensor] = None
pooler_output: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
pooler_output: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None
@ -187,7 +186,7 @@ class TFLongformerMaskedLMOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None
@ -235,8 +234,8 @@ class TFLongformerQuestionAnsweringModelOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
start_logits: Optional[tf.Tensor] = None
end_logits: Optional[tf.Tensor] = None
start_logits: tf.Tensor | None = None
end_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None
@ -282,7 +281,7 @@ class TFLongformerSequenceClassifierOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None
@ -330,7 +329,7 @@ class TFLongformerMultipleChoiceModelOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None
@ -376,7 +375,7 @@ class TFLongformerTokenClassifierOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
global_attentions: tuple[tf.Tensor, ...] | None = None
@ -2138,11 +2137,11 @@ class TFLongformerModel(TFLongformerPreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFLongformerBaseModelOutputWithPooling, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFLongformerBaseModelOutputWithPooling | tuple[tf.Tensor]:
outputs = self.longformer(
input_ids=input_ids,
attention_mask=attention_mask,
@ -2208,12 +2207,12 @@ class TFLongformerForMaskedLM(TFLongformerPreTrainedModel, TFMaskedLanguageModel
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFLongformerMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFLongformerMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -2304,13 +2303,13 @@ class TFLongformerForQuestionAnswering(TFLongformerPreTrainedModel, TFQuestionAn
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFLongformerQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFLongformerQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.
@ -2477,12 +2476,12 @@ class TFLongformerForSequenceClassification(TFLongformerPreTrainedModel, TFSeque
position_ids: np.ndarray | tf.Tensor | None = None,
global_attention_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFLongformerSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFLongformerSequenceClassifierOutput | tuple[tf.Tensor]:
if input_ids is not None and not isinstance(input_ids, tf.Tensor):
input_ids = tf.convert_to_tensor(input_ids, dtype=tf.int64)
elif input_ids is not None:
@ -2603,12 +2602,12 @@ class TFLongformerForMultipleChoice(TFLongformerPreTrainedModel, TFMultipleChoic
position_ids: np.ndarray | tf.Tensor | None = None,
global_attention_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFLongformerMultipleChoiceModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFLongformerMultipleChoiceModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -2721,12 +2720,12 @@ class TFLongformerForTokenClassification(TFLongformerPreTrainedModel, TFTokenCla
position_ids: np.ndarray | tf.Tensor | None = None,
global_attention_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
labels: Optional[Union[np.array, tf.Tensor]] = None,
training: Optional[bool] = False,
) -> Union[TFLongformerTokenClassifierOutput, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.array | tf.Tensor | None = None,
training: bool | None = False,
) -> TFLongformerTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

View File

@ -20,7 +20,6 @@ from __future__ import annotations
import warnings
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -1109,11 +1108,11 @@ class TFLxmertModel(TFLxmertPreTrainedModel):
visual_attention_mask: np.ndarray | tf.Tensor | None = None,
token_type_ids: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple, TFLxmertModelOutput]:
) -> tuple | TFLxmertModelOutput:
outputs = self.lxmert(
input_ids,
visual_feats,

View File

@ -17,7 +17,6 @@
from __future__ import annotations
import random
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -103,7 +102,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -216,7 +215,7 @@ class TFMarianAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel"""
@ -367,7 +366,7 @@ class TFMarianEncoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor,
attention_mask: np.ndarray | tf.Tensor | None,
layer_head_mask: tf.Tensor | None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tf.Tensor:
"""
Args:
@ -461,8 +460,8 @@ class TFMarianDecoderLayer(keras.layers.Layer):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
training: Optional[bool] = False,
past_key_value: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
"""
Args:
@ -723,7 +722,7 @@ class TFMarianEncoder(keras.layers.Layer):
config: MarianConfig
"""
def __init__(self, config: MarianConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: MarianConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.dropout = keras.layers.Dropout(config.dropout)
@ -753,9 +752,9 @@ class TFMarianEncoder(keras.layers.Layer):
inputs_embeds: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
):
"""
@ -890,7 +889,7 @@ class TFMarianDecoder(keras.layers.Layer):
embed_tokens: output embedding
"""
def __init__(self, config: MarianConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: MarianConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.padding_idx = config.pad_token_id
@ -924,10 +923,10 @@ class TFMarianDecoder(keras.layers.Layer):
head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None,
past_key_values: tuple[tuple[tf.Tensor]] | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
):
r"""
@ -1154,14 +1153,14 @@ class TFMarianMainLayer(keras.layers.Layer):
head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None,
past_key_values: Optional[tuple[tuple[tf.Tensor]]] = None,
encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[tf.Tensor]] | None = None,
inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
**kwargs,
):

View File

@ -17,7 +17,6 @@
from __future__ import annotations
import random
from typing import Optional, Union
import tensorflow as tf
@ -102,7 +101,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -129,7 +128,7 @@ class TFMBartLearnedPositionalEmbedding(keras.layers.Embedding):
def call(
self,
input_shape: Optional[tf.TensorShape] = None,
input_shape: tf.TensorShape | None = None,
past_key_values_length: int = 0,
position_ids: tf.Tensor | None = None,
):
@ -185,7 +184,7 @@ class TFMBartAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel"""
@ -335,7 +334,7 @@ class TFMBartEncoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor,
attention_mask: tf.Tensor,
layer_head_mask: tf.Tensor,
training: Optional[bool] = False,
training: bool | None = False,
):
"""
Args:
@ -429,7 +428,7 @@ class TFMBartDecoderLayer(keras.layers.Layer):
layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: tuple[tf.Tensor] | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
"""
Args:
@ -713,7 +712,7 @@ class TFMBartEncoder(keras.layers.Layer):
config: MBartConfig
"""
def __init__(self, config: MBartConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: MBartConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.dropout = keras.layers.Dropout(config.dropout)
@ -746,11 +745,11 @@ class TFMBartEncoder(keras.layers.Layer):
inputs_embeds: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutput | tuple[tf.Tensor]:
"""
Args:
input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`):
@ -892,7 +891,7 @@ class TFMBartDecoder(keras.layers.Layer):
embed_tokens: output embedding
"""
def __init__(self, config: MBartConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: MBartConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.padding_idx = config.pad_token_id
@ -928,14 +927,12 @@ class TFMBartDecoder(keras.layers.Layer):
head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None,
past_key_values: tuple[tuple[tf.Tensor]] | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[
TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]
]:
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]:
r"""
Args:
input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`):
@ -1169,17 +1166,17 @@ class TFMBartMainLayer(keras.layers.Layer):
head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None,
encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[tf.Tensor]] | None = None,
inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs,
) -> Union[TFSeq2SeqModelOutput, tf.Tensor]:
) -> TFSeq2SeqModelOutput | tf.Tensor:
if decoder_input_ids is None and decoder_inputs_embeds is None:
use_cache = False
@ -1293,17 +1290,17 @@ class TFMBartModel(TFMBartPreTrainedModel):
head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None,
encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[tf.Tensor]] | None = None,
inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs,
) -> Union[TFSeq2SeqModelOutput, tuple[tf.Tensor]]:
) -> TFSeq2SeqModelOutput | tuple[tf.Tensor]:
outputs = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
@ -1429,17 +1426,17 @@ class TFMBartForConditionalGeneration(TFMBartPreTrainedModel, TFCausalLanguageMo
head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[TFBaseModelOutput] = None,
past_key_values: Optional[tuple[tuple[tf.Tensor]]] = None,
encoder_outputs: TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[tf.Tensor]] | None = None,
inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSeq2SeqLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSeq2SeqLMOutput | tuple[tf.Tensor]:
"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import warnings
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -1063,8 +1062,8 @@ class TFMobileBertForPreTrainingOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
prediction_logits: Optional[tf.Tensor] = None
seq_relationship_logits: Optional[tf.Tensor] = None
prediction_logits: tf.Tensor | None = None
seq_relationship_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -1191,11 +1190,11 @@ class TFMobileBertModel(TFMobileBertPreTrainedModel):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFBaseModelOutputWithPooling]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFBaseModelOutputWithPooling:
outputs = self.mobilebert(
input_ids=input_ids,
attention_mask=attention_mask,
@ -1252,13 +1251,13 @@ class TFMobileBertForPreTraining(TFMobileBertPreTrainedModel, TFMobileBertPreTra
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
next_sentence_label: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFMobileBertForPreTrainingOutput]:
training: bool | None = False,
) -> tuple | TFMobileBertForPreTrainingOutput:
r"""
Return:
@ -1369,12 +1368,12 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFMaskedLMOutput]:
training: bool | None = False,
) -> tuple | TFMaskedLMOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1471,12 +1470,12 @@ class TFMobileBertForNextSentencePrediction(TFMobileBertPreTrainedModel, TFNextS
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
next_sentence_label: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFNextSentencePredictorOutput]:
training: bool | None = False,
) -> tuple | TFNextSentencePredictorOutput:
r"""
Return:
@ -1587,12 +1586,12 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFSequenceClassifierOutput]:
training: bool | None = False,
) -> tuple | TFSequenceClassifierOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1687,13 +1686,13 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFQuestionAnsweringModelOutput]:
training: bool | None = False,
) -> tuple | TFQuestionAnsweringModelOutput:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.
@ -1796,12 +1795,12 @@ class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoic
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFMultipleChoiceModelOutput]:
training: bool | None = False,
) -> tuple | TFMultipleChoiceModelOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1914,12 +1913,12 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFTokenClassifierOutput]:
training: bool | None = False,
) -> tuple | TFTokenClassifierOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

View File

@ -18,8 +18,6 @@
from __future__ import annotations
from typing import Optional, Union
import tensorflow as tf
from ...activations_tf import get_tf_activation
@ -61,7 +59,7 @@ _IMAGE_CLASS_CHECKPOINT = "apple/mobilevit-small"
_IMAGE_CLASS_EXPECTED_OUTPUT = "tabby, tabby cat"
def make_divisible(value: int, divisor: int = 8, min_value: Optional[int] = None) -> int:
def make_divisible(value: int, divisor: int = 8, min_value: int | None = None) -> int:
"""
Ensure that all layers have a channel count that is divisible by `divisor`. This function is taken from the
original TensorFlow repo. It can be seen here:
@ -88,7 +86,7 @@ class TFMobileViTConvLayer(keras.layers.Layer):
bias: bool = False,
dilation: int = 1,
use_normalization: bool = True,
use_activation: Union[bool, str] = True,
use_activation: bool | str = True,
**kwargs,
) -> None:
super().__init__(**kwargs)
@ -764,7 +762,7 @@ class TFMobileViTEncoder(keras.layers.Layer):
output_hidden_states: bool = False,
return_dict: bool = True,
training: bool = False,
) -> Union[tuple, TFBaseModelOutput]:
) -> tuple | TFBaseModelOutput:
all_hidden_states = () if output_hidden_states else None
for i, layer_module in enumerate(self.layers):
@ -830,10 +828,10 @@ class TFMobileViTMainLayer(keras.layers.Layer):
def call(
self,
pixel_values: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], TFBaseModelOutputWithPooling]:
) -> tuple[tf.Tensor] | TFBaseModelOutputWithPooling:
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
@ -998,10 +996,10 @@ class TFMobileViTModel(TFMobileViTPreTrainedModel):
def call(
self,
pixel_values: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], TFBaseModelOutputWithPooling]:
) -> tuple[tf.Tensor] | TFBaseModelOutputWithPooling:
output = self.mobilevit(pixel_values, output_hidden_states, return_dict, training=training)
return output
@ -1046,11 +1044,11 @@ class TFMobileViTForImageClassification(TFMobileViTPreTrainedModel, TFSequenceCl
def call(
self,
pixel_values: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None,
output_hidden_states: bool | None = None,
labels: tf.Tensor | None = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFImageClassifierOutputWithNoAttention]:
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFImageClassifierOutputWithNoAttention:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
@ -1287,10 +1285,10 @@ class TFMobileViTForSemanticSegmentation(TFMobileViTPreTrainedModel):
self,
pixel_values: tf.Tensor | None = None,
labels: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple, TFSemanticSegmenterOutputWithNoAttention]:
) -> tuple | TFSemanticSegmenterOutputWithNoAttention:
r"""
labels (`tf.Tensor` of shape `(batch_size, height, width)`, *optional*):
Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math
import warnings
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -765,15 +764,15 @@ class TFMPNetModel(TFMPNetPreTrainedModel):
def call(
self,
input_ids: TFModelInputType | None = None,
attention_mask: Optional[Union[np.array, tf.Tensor]] = None,
position_ids: Optional[Union[np.array, tf.Tensor]] = None,
head_mask: Optional[Union[np.array, tf.Tensor]] = None,
attention_mask: np.array | tf.Tensor | None = None,
position_ids: np.array | tf.Tensor | None = None,
head_mask: np.array | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
outputs = self.mpnet(
input_ids=input_ids,
attention_mask=attention_mask,
@ -887,12 +886,12 @@ class TFMPNetForMaskedLM(TFMPNetPreTrainedModel, TFMaskedLanguageModelingLoss):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: tf.Tensor | None = None,
training: bool = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1002,16 +1001,16 @@ class TFMPNetForSequenceClassification(TFMPNetPreTrainedModel, TFSequenceClassif
def call(
self,
input_ids: TFModelInputType | None = None,
attention_mask: Optional[Union[np.array, tf.Tensor]] = None,
position_ids: Optional[Union[np.array, tf.Tensor]] = None,
head_mask: Optional[Union[np.array, tf.Tensor]] = None,
attention_mask: np.array | tf.Tensor | None = None,
position_ids: np.array | tf.Tensor | None = None,
head_mask: np.array | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: tf.Tensor | None = None,
training: bool = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1090,12 +1089,12 @@ class TFMPNetForMultipleChoice(TFMPNetPreTrainedModel, TFMultipleChoiceLoss):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: tf.Tensor | None = None,
training: bool = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1191,12 +1190,12 @@ class TFMPNetForTokenClassification(TFMPNetPreTrainedModel, TFTokenClassificatio
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: tf.Tensor | None = None,
training: bool = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1272,18 +1271,18 @@ class TFMPNetForQuestionAnswering(TFMPNetPreTrainedModel, TFQuestionAnsweringLos
def call(
self,
input_ids: TFModelInputType | None = None,
attention_mask: Optional[Union[np.array, tf.Tensor]] = None,
position_ids: Optional[Union[np.array, tf.Tensor]] = None,
head_mask: Optional[Union[np.array, tf.Tensor]] = None,
attention_mask: np.array | tf.Tensor | None = None,
position_ids: np.array | tf.Tensor | None = None,
head_mask: np.array | tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: tf.Tensor | None = None,
end_positions: tf.Tensor | None = None,
training: bool = False,
**kwargs,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -18,7 +18,6 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -293,11 +292,11 @@ class TFOpenAIGPTMainLayer(keras.layers.Layer):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFBaseModelOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFBaseModelOutput:
if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None:
@ -429,8 +428,8 @@ class TFOpenAIGPTDoubleHeadsModelOutput(ModelOutput):
heads.
"""
logits: Optional[tf.Tensor] = None
mc_logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
mc_logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -557,11 +556,11 @@ class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFBaseModelOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFBaseModelOutput:
outputs = self.transformer(
input_ids=input_ids,
attention_mask=attention_mask,
@ -620,12 +619,12 @@ class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel, TFCausalLanguageModelin
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFCausalLMOutput]:
training: bool | None = False,
) -> tuple | TFCausalLMOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,
@ -708,11 +707,11 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
mc_token_ids: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFOpenAIGPTDoubleHeadsModelOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFOpenAIGPTDoubleHeadsModelOutput:
r"""
mc_token_ids (`tf.Tensor` or `Numpy array` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input):
Index of the classification token in each input sequence. Selected in the range `[0, input_ids.size(-1) -
@ -853,12 +852,12 @@ class TFOpenAIGPTForSequenceClassification(TFOpenAIGPTPreTrainedModel, TFSequenc
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[tuple, TFSequenceClassifierOutput]:
training: bool | None = False,
) -> tuple | TFSequenceClassifierOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,

View File

@ -16,8 +16,6 @@
from __future__ import annotations
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -78,7 +76,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -157,7 +155,7 @@ class TFOPTAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel"""
@ -312,10 +310,10 @@ class TFOPTDecoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor,
attention_mask: np.ndarray | tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
past_key_value: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
training: Optional[bool] = False,
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
past_key_value: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
training: bool | None = False,
output_attentions: bool | None = False,
use_cache: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
"""
Args:
@ -578,13 +576,13 @@ class TFOPTDecoder(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPast, tuple[tf.Tensor]]:
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPast | tuple[tf.Tensor]:
r"""
Args:
input_ids (`tf.Tensor` of shape `(batch_size, sequence_length)`):
@ -780,15 +778,15 @@ class TFOPTMainLayer(keras.layers.Layer):
input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPast, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPast | tuple[tf.Tensor]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -860,15 +858,15 @@ class TFOPTModel(TFOPTPreTrainedModel):
input_ids: TFModelInputType | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs,
) -> Union[TFBaseModelOutputWithPast, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPast | tuple[tf.Tensor]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -963,19 +961,19 @@ class TFOPTForCausalLM(TFOPTPreTrainedModel, TFCausalLanguageModelingLoss):
def call(
self,
input_ids: TFModelInputType | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
attention_mask: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs,
) -> Union[TFCausalLMOutputWithPast, tuple[tf.Tensor]]:
) -> TFCausalLMOutputWithPast | tuple[tf.Tensor]:
r"""
Args:
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):

View File

@ -17,7 +17,6 @@
from __future__ import annotations
import random
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -104,7 +103,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -218,7 +217,7 @@ class TFPegasusAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel"""
@ -369,7 +368,7 @@ class TFPegasusEncoderLayer(keras.layers.Layer):
hidden_states: tf.Tensor,
attention_mask: tf.Tensor,
layer_head_mask: tf.Tensor,
training: Optional[bool] = False,
training: bool | None = False,
):
"""
Args:
@ -464,7 +463,7 @@ class TFPegasusDecoderLayer(keras.layers.Layer):
layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: tuple[tf.Tensor] | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
"""
Args:
@ -726,7 +725,7 @@ class TFPegasusEncoder(keras.layers.Layer):
config: PegasusConfig
"""
def __init__(self, config: PegasusConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: PegasusConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.dropout = keras.layers.Dropout(config.dropout)
@ -757,10 +756,10 @@ class TFPegasusEncoder(keras.layers.Layer):
inputs_embeds: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
):
"""
Args:
@ -899,7 +898,7 @@ class TFPegasusDecoder(keras.layers.Layer):
embed_tokens: output embedding
"""
def __init__(self, config: PegasusConfig, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: PegasusConfig, embed_tokens: keras.layers.Embedding | None = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.padding_idx = config.pad_token_id
@ -933,12 +932,12 @@ class TFPegasusDecoder(keras.layers.Layer):
encoder_attention_mask: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[tf.Tensor]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
past_key_values: tuple[tuple[tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
):
r"""
Args:
@ -1169,15 +1168,15 @@ class TFPegasusMainLayer(keras.layers.Layer):
head_mask: tf.Tensor | None = None,
decoder_head_mask: tf.Tensor | None = None,
cross_attn_head_mask: tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None,
past_key_values: Optional[tuple[tuple[tf.Tensor]]] = None,
encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[tf.Tensor]] | None = None,
inputs_embeds: tf.Tensor | None = None,
decoder_inputs_embeds: tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs,
):
if decoder_input_ids is None and decoder_inputs_embeds is None:
@ -1290,17 +1289,17 @@ class TFPegasusModel(TFPegasusPreTrainedModel):
head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
**kwargs,
) -> Union[TFSeq2SeqModelOutput, tuple[tf.Tensor]]:
) -> TFSeq2SeqModelOutput | tuple[tf.Tensor]:
outputs = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
@ -1426,17 +1425,17 @@ class TFPegasusForConditionalGeneration(TFPegasusPreTrainedModel, TFCausalLangua
head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[TFBaseModelOutput] = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
encoder_outputs: TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFSeq2SeqLMOutput, tuple[tf.Tensor]]:
) -> TFSeq2SeqLMOutput | tuple[tf.Tensor]:
"""
labels (`tf.tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import copy
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -115,7 +114,7 @@ class TFRetrievAugLMMarginOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
doc_scores: tf.Tensor | None = None
retrieved_doc_embeds: tf.Tensor | None = None
@ -198,7 +197,7 @@ class TFRetrievAugLMOutput(ModelOutput):
average in the self-attention heads.
"""
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
past_key_values: list[tf.Tensor] | None = None
doc_scores: tf.Tensor | None = None
retrieved_doc_embeds: tf.Tensor | None = None
@ -232,8 +231,8 @@ class TFRagPreTrainedModel(TFPreTrainedModel):
@classmethod
def from_pretrained_question_encoder_generator(
cls,
question_encoder_pretrained_model_name_or_path: Optional[str] = None,
generator_pretrained_model_name_or_path: Optional[str] = None,
question_encoder_pretrained_model_name_or_path: str | None = None,
generator_pretrained_model_name_or_path: str | None = None,
retriever: RagRetriever = None,
*model_args,
**kwargs,
@ -499,11 +498,11 @@ class TFRagModel(TFRagPreTrainedModel):
def __init__(
self,
config: Optional[PretrainedConfig] = None,
question_encoder: Optional[TFPreTrainedModel] = None,
generator: Optional[TFPreTrainedModel] = None,
retriever: Optional[RagRetriever] = None,
load_weight_prefix: Optional[str] = None,
config: PretrainedConfig | None = None,
question_encoder: TFPreTrainedModel | None = None,
generator: TFPreTrainedModel | None = None,
retriever: RagRetriever | None = None,
load_weight_prefix: str | None = None,
**kwargs,
):
assert config is not None or (question_encoder is not None and generator is not None), (
@ -554,7 +553,7 @@ class TFRagModel(TFRagPreTrainedModel):
encoder_outputs: np.ndarray | tf.Tensor | None = None,
decoder_input_ids: np.ndarray | tf.Tensor | None = None,
decoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: tuple[tuple[Union[np.ndarray, tf.Tensor]]] | None = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
doc_scores: np.ndarray | tf.Tensor | None = None,
context_input_ids: np.ndarray | tf.Tensor | None = None,
context_attention_mask: np.ndarray | tf.Tensor | None = None,
@ -741,10 +740,10 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss
def __init__(
self,
config: Optional[PretrainedConfig] = None,
question_encoder: Optional[TFPreTrainedModel] = None,
generator: Optional[TFPreTrainedModel] = None,
retriever: Optional[RagRetriever] = None,
config: PretrainedConfig | None = None,
question_encoder: TFPreTrainedModel | None = None,
generator: TFPreTrainedModel | None = None,
retriever: RagRetriever | None = None,
**kwargs,
):
assert config is not None or (question_encoder is not None and generator is not None), (
@ -859,7 +858,7 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss
decoder_input_ids: np.ndarray | tf.Tensor | None = None,
decoder_attention_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: np.ndarray | tf.Tensor | None = None,
past_key_values: tuple[tuple[Union[np.ndarray, tf.Tensor]]] | None = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
doc_scores: np.ndarray | tf.Tensor | None = None,
context_input_ids: np.ndarray | tf.Tensor | None = None,
context_attention_mask: np.ndarray | tf.Tensor | None = None,
@ -1321,10 +1320,10 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL
def __init__(
self,
config: Optional[PretrainedConfig] = None,
question_encoder: Optional[TFPreTrainedModel] = None,
generator: Optional[TFPreTrainedModel] = None,
retriever: Optional[RagRetriever] = None,
config: PretrainedConfig | None = None,
question_encoder: TFPreTrainedModel | None = None,
generator: TFPreTrainedModel | None = None,
retriever: RagRetriever | None = None,
**kwargs,
):
assert config is not None or (question_encoder is not None and generator is not None), (
@ -1373,22 +1372,22 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL
decoder_input_ids: np.ndarray | tf.Tensor | None = None,
decoder_attention_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
doc_scores: np.ndarray | tf.Tensor | None = None,
context_input_ids: np.ndarray | tf.Tensor | None = None,
context_attention_mask: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
output_retrieved: Optional[bool] = None,
n_docs: Optional[int] = None,
exclude_bos_score: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
output_retrieved: bool | None = None,
n_docs: int | None = None,
exclude_bos_score: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
reduce_loss: Optional[bool] = None,
return_dict: Optional[bool] = None,
reduce_loss: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
**kwargs, # needs kwargs for generation
) -> Union[tuple[tf.Tensor], TFRetrievAugLMMarginOutput]:
) -> tuple[tf.Tensor] | TFRetrievAugLMMarginOutput:
r"""
exclude_bos_score (`bool`, *optional*):
Only relevant if `labels` is passed. If `True`, the score of the BOS token is disregarded when computing

View File

@ -17,7 +17,6 @@
from __future__ import annotations
import math
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -106,10 +105,10 @@ class TFRemBertEmbeddings(keras.layers.Layer):
def call(
self,
input_ids: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
token_type_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
past_key_values_length=0,
training: bool = False,
) -> tf.Tensor:
@ -550,7 +549,7 @@ class TFRemBertEncoder(keras.layers.Layer):
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
hidden_states = self.embedding_hidden_mapping_in(inputs=hidden_states)
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
@ -766,13 +765,13 @@ class TFRemBertMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder:
use_cache = False
@ -1063,13 +1062,13 @@ class TFRemBertModel(TFRemBertPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]:
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1150,12 +1149,12 @@ class TFRemBertForMaskedLM(TFRemBertPreTrainedModel, TFMaskedLanguageModelingLos
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1246,14 +1245,14 @@ class TFRemBertForCausalLM(TFRemBertPreTrainedModel, TFCausalLanguageModelingLos
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1364,12 +1363,12 @@ class TFRemBertForSequenceClassification(TFRemBertPreTrainedModel, TFSequenceCla
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1449,12 +1448,12 @@ class TFRemBertForMultipleChoice(TFRemBertPreTrainedModel, TFMultipleChoiceLoss)
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1559,12 +1558,12 @@ class TFRemBertForTokenClassification(TFRemBertPreTrainedModel, TFTokenClassific
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1643,13 +1642,13 @@ class TFRemBertForQuestionAnswering(TFRemBertPreTrainedModel, TFQuestionAnswerin
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math
import warnings
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -596,12 +595,12 @@ class TFRobertaEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool],
use_cache: bool | None,
output_attentions: bool,
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -709,13 +708,13 @@ class TFRobertaMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder:
use_cache = False
@ -1005,13 +1004,13 @@ class TFRobertaModel(TFRobertaPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFBaseModelOutputWithPoolingAndCrossAttentions]:
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFBaseModelOutputWithPoolingAndCrossAttentions:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1156,12 +1155,12 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel, TFMaskedLanguageModelingLos
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1259,14 +1258,14 @@ class TFRobertaForCausalLM(TFRobertaPreTrainedModel, TFCausalLanguageModelingLos
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1417,12 +1416,12 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel, TFSequenceCla
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1506,12 +1505,12 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss)
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1614,12 +1613,12 @@ class TFRobertaForTokenClassification(TFRobertaPreTrainedModel, TFTokenClassific
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1704,13 +1703,13 @@ class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel, TFQuestionAnswerin
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math
import warnings
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -597,12 +596,12 @@ class TFRobertaPreLayerNormEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool],
use_cache: bool | None,
output_attentions: bool,
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -707,13 +706,13 @@ class TFRobertaPreLayerNormMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder:
use_cache = False
@ -1007,13 +1006,13 @@ class TFRobertaPreLayerNormModel(TFRobertaPreLayerNormPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFBaseModelOutputWithPoolingAndCrossAttentions]:
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFBaseModelOutputWithPoolingAndCrossAttentions:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1165,12 +1164,12 @@ class TFRobertaPreLayerNormForMaskedLM(TFRobertaPreLayerNormPreTrainedModel, TFM
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1275,14 +1274,14 @@ class TFRobertaPreLayerNormForCausalLM(TFRobertaPreLayerNormPreTrainedModel, TFC
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1437,12 +1436,12 @@ class TFRobertaPreLayerNormForSequenceClassification(
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1529,12 +1528,12 @@ class TFRobertaPreLayerNormForMultipleChoice(TFRobertaPreLayerNormPreTrainedMode
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1638,12 +1637,12 @@ class TFRobertaPreLayerNormForTokenClassification(TFRobertaPreLayerNormPreTraine
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1729,13 +1728,13 @@ class TFRobertaPreLayerNormForQuestionAnswering(TFRobertaPreLayerNormPreTrainedM
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -17,7 +17,6 @@
from __future__ import annotations
import math
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -156,9 +155,9 @@ class TFRoFormerEmbeddings(keras.layers.Layer):
def call(
self,
input_ids: Optional[tf.Tensor] = None,
token_type_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
training: bool = False,
) -> tf.Tensor:
"""
@ -524,7 +523,7 @@ class TFRoFormerEncoder(keras.layers.Layer):
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
@ -715,11 +714,11 @@ class TFRoFormerMainLayer(keras.layers.Layer):
token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None:
@ -934,11 +933,11 @@ class TFRoFormerModel(TFRoFormerPreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
outputs = self.roformer(
input_ids=input_ids,
attention_mask=attention_mask,
@ -993,12 +992,12 @@ class TFRoFormerForMaskedLM(TFRoFormerPreTrainedModel, TFMaskedLanguageModelingL
token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1072,12 +1071,12 @@ class TFRoFormerForCausalLM(TFRoFormerPreTrainedModel, TFCausalLanguageModelingL
token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFCausalLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFCausalLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,
@ -1198,12 +1197,12 @@ class TFRoFormerForSequenceClassification(TFRoFormerPreTrainedModel, TFSequenceC
token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1282,12 +1281,12 @@ class TFRoFormerForMultipleChoice(TFRoFormerPreTrainedModel, TFMultipleChoiceLos
token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1389,12 +1388,12 @@ class TFRoFormerForTokenClassification(TFRoFormerPreTrainedModel, TFTokenClassif
token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1471,13 +1470,13 @@ class TFRoFormerForQuestionAnswering(TFRoFormerPreTrainedModel, TFQuestionAnswer
token_type_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -21,7 +21,6 @@ from __future__ import annotations
import collections
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -71,7 +70,7 @@ class TFSamVisionEncoderOutput(ModelOutput):
"""
image_embeds: tf.Tensor | None = None
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -105,8 +104,8 @@ class TFSamImageSegmentationOutput(ModelOutput):
heads.
"""
iou_scores: Optional[tf.Tensor] = None
pred_masks: Optional[tf.Tensor] = None
iou_scores: tf.Tensor | None = None
pred_masks: tf.Tensor | None = None
vision_hidden_states: tuple[tf.Tensor, ...] | None = None
vision_attentions: tuple[tf.Tensor, ...] | None = None
mask_decoder_attentions: tuple[tf.Tensor, ...] | None = None
@ -431,10 +430,10 @@ class TFSamTwoWayTransformer(keras.layers.Layer):
point_embeddings: tf.Tensor,
image_embeddings: tf.Tensor,
image_positional_embeddings: tf.Tensor,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[tuple, TFBaseModelOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
) -> tuple | TFBaseModelOutput:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -613,7 +612,7 @@ class TFSamMaskDecoder(keras.layers.Layer):
sparse_prompt_embeddings: tf.Tensor,
dense_prompt_embeddings: tf.Tensor,
multimask_output: bool,
output_attentions: Optional[bool] = None,
output_attentions: bool | None = None,
) -> tuple[tf.Tensor, tf.Tensor]:
batch_size, num_channels, height, width = shape_list(image_embeddings)
point_batch_size = tf.math.maximum(1, tf.shape(sparse_prompt_embeddings)[1])
@ -857,8 +856,8 @@ class TFSamPromptEncoder(keras.layers.Layer):
def call(
self,
batch_size: Optional[int],
input_points: Optional[tuple[tf.Tensor, tf.Tensor]],
batch_size: int | None,
input_points: tuple[tf.Tensor, tf.Tensor] | None,
input_labels: tf.Tensor | None,
input_boxes: tf.Tensor | None,
input_masks: tf.Tensor | None,
@ -1119,8 +1118,8 @@ class TFSamVisionLayer(keras.layers.Layer):
def call(
self,
hidden_states: tf.Tensor,
output_attentions: Optional[bool] = False,
training: Optional[bool] = False,
output_attentions: bool | None = False,
training: bool | None = False,
) -> tuple[tf.Tensor]:
residual = hidden_states
@ -1268,11 +1267,11 @@ class TFSamVisionEncoder(keras.layers.Layer):
def call(
self,
pixel_values: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFSamVisionEncoderOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFSamVisionEncoderOutput:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -1506,9 +1505,9 @@ class TFSamModel(TFSamPreTrainedModel):
def get_image_embeddings(
self,
pixel_values,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
):
r"""
Returns the image embeddings by passing the pixel values through the vision encoder.

View File

@ -17,7 +17,6 @@
from __future__ import annotations
import math
from typing import Optional, Union
import tensorflow as tf
@ -169,7 +168,7 @@ class TFSegformerEfficientSelfAttention(keras.layers.Layer):
width: int,
output_attentions: bool = False,
training: bool = False,
) -> Union[tf.Tensor, tuple[tf.Tensor, tf.Tensor]]:
) -> tf.Tensor | tuple[tf.Tensor, tf.Tensor]:
batch_size = shape_list(hidden_states)[0]
num_channels = shape_list(hidden_states)[2]
@ -272,7 +271,7 @@ class TFSegformerAttention(keras.layers.Layer):
def call(
self, hidden_states: tf.Tensor, height: int, width: int, output_attentions: bool = False
) -> Union[tf.Tensor, tuple[tf.Tensor, tf.Tensor]]:
) -> tf.Tensor | tuple[tf.Tensor, tf.Tensor]:
self_outputs = self.self(hidden_states, height, width, output_attentions)
attention_output = self.dense_output(self_outputs[0])
@ -325,8 +324,8 @@ class TFSegformerMixFFN(keras.layers.Layer):
self,
config: SegformerConfig,
in_features: int,
hidden_features: Optional[int] = None,
out_features: Optional[int] = None,
hidden_features: int | None = None,
out_features: int | None = None,
**kwargs,
):
super().__init__(**kwargs)
@ -499,11 +498,11 @@ class TFSegformerEncoder(keras.layers.Layer):
def call(
self,
pixel_values: tf.Tensor,
output_attentions: Optional[bool] = False,
output_hidden_states: Optional[bool] = False,
return_dict: Optional[bool] = True,
output_attentions: bool | None = False,
output_hidden_states: bool | None = False,
return_dict: bool | None = True,
training: bool = False,
) -> Union[tuple, TFBaseModelOutput]:
) -> tuple | TFBaseModelOutput:
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
@ -580,11 +579,11 @@ class TFSegformerMainLayer(keras.layers.Layer):
def call(
self,
pixel_values: tf.Tensor,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple, TFBaseModelOutput]:
) -> tuple | TFBaseModelOutput:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -714,11 +713,11 @@ class TFSegformerModel(TFSegformerPreTrainedModel):
def call(
self,
pixel_values: tf.Tensor,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple, TFBaseModelOutput]:
) -> tuple | TFBaseModelOutput:
outputs = self.segformer(
pixel_values,
output_attentions=output_attentions,
@ -767,10 +766,10 @@ class TFSegformerForImageClassification(TFSegformerPreTrainedModel, TFSequenceCl
self,
pixel_values: tf.Tensor | None = None,
labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[tuple, TFSequenceClassifierOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
) -> tuple | TFSequenceClassifierOutput:
outputs = self.segformer(
pixel_values,
output_attentions=output_attentions,
@ -951,10 +950,10 @@ class TFSegformerForSemanticSegmentation(TFSegformerPreTrainedModel):
self,
pixel_values: tf.Tensor,
labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[tuple, TFSemanticSegmenterOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
) -> tuple | TFSemanticSegmenterOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, height, width)`, *optional*):
Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,

View File

@ -17,7 +17,6 @@
from __future__ import annotations
import random
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -102,7 +101,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -173,7 +172,7 @@ class TFConv1dSubsampler(keras.layers.Layer):
class TFSpeech2TextSinusoidalPositionalEmbedding(keras.layers.Layer):
"""This module produces sinusoidal positional embeddings of any length."""
def __init__(self, num_positions: int, embedding_dim: int, padding_idx: Optional[int] = None, **kwargs):
def __init__(self, num_positions: int, embedding_dim: int, padding_idx: int | None = None, **kwargs):
super().__init__(**kwargs)
self.offset = 2
self.embedding_dim = embedding_dim
@ -181,7 +180,7 @@ class TFSpeech2TextSinusoidalPositionalEmbedding(keras.layers.Layer):
self.embedding_weights = self._get_embedding(num_positions + self.offset, embedding_dim, padding_idx)
@staticmethod
def _get_embedding(num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None) -> tf.Tensor:
def _get_embedding(num_embeddings: int, embedding_dim: int, padding_idx: int | None = None) -> tf.Tensor:
"""
Build sinusoidal embeddings. This matches the implementation in tensor2tensor, but differs slightly from the
description in Section 3.5 of "Attention Is All You Need".
@ -214,7 +213,7 @@ class TFSpeech2TextSinusoidalPositionalEmbedding(keras.layers.Layer):
@staticmethod
def create_position_ids_from_input_ids(
input_ids: tf.Tensor, padding_idx: int, past_key_values_length: Optional[int] = 0
input_ids: tf.Tensor, padding_idx: int, past_key_values_length: int | None = 0
) -> tf.Tensor:
"""
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding
@ -271,7 +270,7 @@ class TFSpeech2TextAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel"""
@ -1346,15 +1345,15 @@ class TFSpeech2TextModel(TFSpeech2TextPreTrainedModel):
decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
**kwargs,
) -> Union[tuple, TFSeq2SeqModelOutput]:
) -> tuple | TFSeq2SeqModelOutput:
outputs = self.model(
input_features=input_features,
attention_mask=attention_mask,
@ -1445,16 +1444,16 @@ class TFSpeech2TextForConditionalGeneration(TFSpeech2TextPreTrainedModel, TFCaus
decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs,
) -> Union[tuple, TFSeq2SeqLMOutput]:
) -> tuple | TFSeq2SeqLMOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,

View File

@ -22,7 +22,7 @@ import warnings
from collections.abc import Iterable
from dataclasses import dataclass
from functools import partial
from typing import Any, Callable, Optional, Union
from typing import Any, Callable
import tensorflow as tf
@ -92,7 +92,7 @@ class TFSwinEncoderOutput(ModelOutput):
include the spatial dimensions.
"""
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
reshaped_hidden_states: tuple[tf.Tensor, ...] | None = None
@ -127,7 +127,7 @@ class TFSwinModelOutput(ModelOutput):
include the spatial dimensions.
"""
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
pooler_output: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -164,7 +164,7 @@ class TFSwinMaskedImageModelingOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
reconstruction: Optional[tf.Tensor] = None
reconstruction: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
reshaped_hidden_states: tuple[tf.Tensor, ...] | None = None
@ -209,7 +209,7 @@ class TFSwinImageClassifierOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
reshaped_hidden_states: tuple[tf.Tensor, ...] | None = None
@ -308,7 +308,7 @@ class TFSwinEmbeddings(keras.layers.Layer):
self.dropout.build(None)
def call(
self, pixel_values: tf.Tensor, bool_masked_pos: Optional[bool] = None, training: bool = False
self, pixel_values: tf.Tensor, bool_masked_pos: bool | None = None, training: bool = False
) -> tuple[tf.Tensor, tuple[int, int]]:
embeddings, output_dimensions = self.patch_embeddings(pixel_values, training=training)
embeddings = self.norm(embeddings, training=training)
@ -413,7 +413,7 @@ class TFSwinPatchMerging(keras.layers.Layer):
"""
def __init__(
self, input_resolution: tuple[int, int], dim: int, norm_layer: Optional[Callable] = None, **kwargs
self, input_resolution: tuple[int, int], dim: int, norm_layer: Callable | None = None, **kwargs
) -> None:
super().__init__(**kwargs)
self.input_resolution = input_resolution
@ -475,7 +475,7 @@ class TFSwinPatchMerging(keras.layers.Layer):
class TFSwinDropPath(keras.layers.Layer):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks)."""
def __init__(self, drop_prob: Optional[float] = None, scale_by_keep: bool = True, **kwargs) -> None:
def __init__(self, drop_prob: float | None = None, scale_by_keep: bool = True, **kwargs) -> None:
super().__init__(**kwargs)
self.drop_prob = drop_prob
self.scale_by_keep = scale_by_keep
@ -908,7 +908,7 @@ class TFSwinStage(keras.layers.Layer):
depth: int,
num_heads: int,
drop_path: list[float],
downsample: Optional[Callable],
downsample: Callable | None,
**kwargs,
) -> None:
super().__init__(**kwargs)
@ -945,7 +945,7 @@ class TFSwinStage(keras.layers.Layer):
hidden_states: tf.Tensor,
input_dimensions: tuple[int, int],
head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False,
output_attentions: bool | None = False,
training: bool = False,
) -> tuple[tf.Tensor, ...]:
height, width = input_dimensions
@ -1015,7 +1015,7 @@ class TFSwinEncoder(keras.layers.Layer):
output_hidden_states: bool = False,
return_dict: bool = True,
training: bool = False,
) -> Union[tuple[tf.Tensor, ...], TFSwinEncoderOutput]:
) -> tuple[tf.Tensor, ...] | TFSwinEncoderOutput:
all_input_dimensions = ()
all_hidden_states = () if output_hidden_states else None
all_reshaped_hidden_states = () if output_hidden_states else None
@ -1157,9 +1157,9 @@ class AdaptiveAveragePooling1D(keras.layers.Layer):
def __init__(
self,
output_size: Union[int, Iterable[int]],
output_size: int | Iterable[int],
reduce_function: Callable = tf.reduce_mean,
data_format: Optional[str] = None,
data_format: str | None = None,
**kwargs,
) -> None:
self.data_format = normalize_data_format(data_format)
@ -1225,7 +1225,7 @@ class TFSwinMainLayer(keras.layers.Layer):
for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads)
def get_head_mask(self, head_mask: Optional[Any]) -> list:
def get_head_mask(self, head_mask: Any | None) -> list:
if head_mask is not None:
raise NotImplementedError
return [None] * len(self.config.depths)
@ -1236,11 +1236,11 @@ class TFSwinMainLayer(keras.layers.Layer):
pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFSwinModelOutput, tuple[tf.Tensor, ...]]:
) -> TFSwinModelOutput | tuple[tf.Tensor, ...]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -1332,11 +1332,11 @@ class TFSwinModel(TFSwinPreTrainedModel):
pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFSwinModelOutput, tuple[tf.Tensor, ...]]:
) -> TFSwinModelOutput | tuple[tf.Tensor, ...]:
r"""
bool_masked_pos (`tf.Tensor` of shape `(batch_size, num_patches)`, *optional*):
Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
@ -1449,11 +1449,11 @@ class TFSwinForMaskedImageModeling(TFSwinPreTrainedModel):
pixel_values: tf.Tensor | None = None,
bool_masked_pos: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple, TFSwinMaskedImageModelingOutput]:
) -> tuple | TFSwinMaskedImageModelingOutput:
r"""
bool_masked_pos (`tf.Tensor` of shape `(batch_size, num_patches)`):
Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
@ -1583,11 +1583,11 @@ class TFSwinForImageClassification(TFSwinPreTrainedModel, TFSequenceClassificati
pixel_values: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
labels: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor, ...], TFSwinImageClassifierOutput]:
) -> tuple[tf.Tensor, ...] | TFSwinImageClassifierOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ...,

View File

@ -21,7 +21,6 @@ import copy
import itertools
import math
import warnings
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -1210,15 +1209,15 @@ class TFT5Model(TFT5PreTrainedModel):
head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFSeq2SeqModelOutput]:
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFSeq2SeqModelOutput:
r"""
Returns:
@ -1387,16 +1386,16 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling
head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFSeq2SeqLMOutput]:
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFSeq2SeqLMOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,
@ -1620,11 +1619,11 @@ class TFT5EncoderModel(TFT5PreTrainedModel):
attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFBaseModelOutput]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFBaseModelOutput:
r"""
Returns:

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import enum
import math
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -111,7 +110,7 @@ class TFTableQuestionAnsweringOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
logits_aggregation: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -170,10 +169,10 @@ class TFTapasEmbeddings(keras.layers.Layer):
def call(
self,
input_ids: Optional[tf.Tensor] = None,
position_ids: Optional[tf.Tensor] = None,
token_type_ids: Optional[tf.Tensor] = None,
inputs_embeds: Optional[tf.Tensor] = None,
input_ids: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
token_type_ids: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
training: bool = False,
) -> tf.Tensor:
"""
@ -627,12 +626,12 @@ class TFTapasEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool],
use_cache: bool | None,
output_attentions: bool,
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -865,11 +864,11 @@ class TFTapasMainLayer(keras.layers.Layer):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None:
@ -1100,11 +1099,11 @@ class TFTapasModel(TFTapasPreTrainedModel):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
r"""
Returns:
@ -1182,12 +1181,12 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss):
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1404,12 +1403,12 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
float_answer: np.ndarray | tf.Tensor | None = None,
numeric_values: np.ndarray | tf.Tensor | None = None,
numeric_values_scale: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTableQuestionAnsweringOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTableQuestionAnsweringOutput | tuple[tf.Tensor]:
r"""
table_mask (`tf.Tensor` of shape `(batch_size, seq_length)`, *optional*):
Mask for the table. Indicates which tokens belong to the table (1). Question tokens, table headers and
@ -1731,12 +1730,12 @@ class TFTapasForSequenceClassification(TFTapasPreTrainedModel, TFSequenceClassif
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import re
import warnings
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -187,9 +186,9 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos
def __init__(
self,
config: Optional[PretrainedConfig] = None,
encoder: Optional[TFPreTrainedModel] = None,
decoder: Optional[TFPreTrainedModel] = None,
config: PretrainedConfig | None = None,
encoder: TFPreTrainedModel | None = None,
decoder: TFPreTrainedModel | None = None,
):
if config is None and (encoder is None or decoder is None):
raise ValueError("Either a configuration or an encoder and a decoder has to be provided.")
@ -309,8 +308,8 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos
@classmethod
def from_encoder_decoder_pretrained(
cls,
encoder_pretrained_model_name_or_path: Optional[str] = None,
decoder_pretrained_model_name_or_path: Optional[str] = None,
encoder_pretrained_model_name_or_path: str | None = None,
decoder_pretrained_model_name_or_path: str | None = None,
*model_args,
**kwargs,
) -> TFPreTrainedModel:
@ -462,17 +461,17 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos
pixel_values: np.ndarray | tf.Tensor | None = None,
decoder_input_ids: np.ndarray | tf.Tensor | None = None,
decoder_attention_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[Union[tuple, TFBaseModelOutput]] = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
encoder_outputs: tuple | TFBaseModelOutput | None = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
**kwargs,
) -> Union[TFSeq2SeqLMOutput, tuple[tf.Tensor]]:
) -> TFSeq2SeqLMOutput | tuple[tf.Tensor]:
r"""
Returns:

View File

@ -17,7 +17,6 @@
from __future__ import annotations
import re
from typing import Optional, Union
import tensorflow as tf
@ -178,9 +177,9 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel):
def __init__(
self,
config: Optional[VisionTextDualEncoderConfig] = None,
vision_model: Optional[TFPreTrainedModel] = None,
text_model: Optional[TFPreTrainedModel] = None,
config: VisionTextDualEncoderConfig | None = None,
vision_model: TFPreTrainedModel | None = None,
text_model: TFPreTrainedModel | None = None,
):
if config is None and (vision_model is None or text_model is None):
raise ValueError("Either a configuration or an vision and a text model has to be provided")
@ -351,13 +350,13 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel):
pixel_values: tf.Tensor | None = None,
attention_mask: tf.Tensor | None = None,
position_ids: tf.Tensor | None = None,
return_loss: Optional[bool] = None,
return_loss: bool | None = None,
token_type_ids: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], TFCLIPOutput]:
) -> tuple[tf.Tensor] | TFCLIPOutput:
r"""
Returns:
@ -465,8 +464,8 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel):
@classmethod
def from_vision_text_pretrained(
cls,
vision_model_name_or_path: Optional[str] = None,
text_model_name_or_path: Optional[str] = None,
vision_model_name_or_path: str | None = None,
text_model_name_or_path: str | None = None,
*model_args,
**kwargs,
) -> TFPreTrainedModel:

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import collections.abc
import math
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -514,7 +513,7 @@ class TFViTEncoder(keras.layers.Layer):
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
@ -583,12 +582,12 @@ class TFViTMainLayer(keras.layers.Layer):
self,
pixel_values: TFModelInputType | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
interpolate_pos_encoding: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
interpolate_pos_encoding: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
if pixel_values is None:
raise ValueError("You have to specify pixel_values")
@ -756,12 +755,12 @@ class TFViTModel(TFViTPreTrainedModel):
self,
pixel_values: TFModelInputType | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
interpolate_pos_encoding: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
interpolate_pos_encoding: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPooling, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPooling | tuple[tf.Tensor]:
outputs = self.vit(
pixel_values=pixel_values,
head_mask=head_mask,
@ -854,13 +853,13 @@ class TFViTForImageClassification(TFViTPreTrainedModel, TFSequenceClassification
self,
pixel_values: TFModelInputType | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
interpolate_pos_encoding: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
interpolate_pos_encoding: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for computing the image classification/regression loss. Indices should be in `[0, ...,

View File

@ -20,7 +20,6 @@ import collections.abc
import math
from copy import deepcopy
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -74,9 +73,9 @@ class TFViTMAEModelOutput(ModelOutput):
the self-attention heads.
"""
last_hidden_state: Optional[tf.Tensor] = None
mask: Optional[tf.Tensor] = None
ids_restore: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
mask: tf.Tensor | None = None
ids_restore: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -99,7 +98,7 @@ class TFViTMAEDecoderOutput(ModelOutput):
the self-attention heads.
"""
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -129,9 +128,9 @@ class TFViTMAEForPreTrainingOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
mask: Optional[tf.Tensor] = None
ids_restore: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
mask: tf.Tensor | None = None
ids_restore: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -314,7 +313,7 @@ class TFViTMAEEmbeddings(keras.layers.Layer):
return sequence_unmasked, mask, ids_restore
def call(
self, pixel_values: tf.Tensor, noise: Optional[tf.Tensor] = None, interpolate_pos_encoding: bool = False
self, pixel_values: tf.Tensor, noise: tf.Tensor | None = None, interpolate_pos_encoding: bool = False
) -> tf.Tensor:
batch_size, num_channels, height, width = shape_list(pixel_values)
embeddings = self.patch_embeddings(pixel_values, interpolate_pos_encoding=interpolate_pos_encoding)
@ -708,7 +707,7 @@ class TFViTMAEEncoder(keras.layers.Layer):
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
@ -775,14 +774,14 @@ class TFViTMAEMainLayer(keras.layers.Layer):
def call(
self,
pixel_values: TFModelInputType | None = None,
noise: Optional[tf.Tensor] = None,
noise: tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
interpolate_pos_encoding: bool = False,
) -> Union[TFViTMAEModelOutput, tuple[tf.Tensor]]:
) -> TFViTMAEModelOutput | tuple[tf.Tensor]:
embedding_output, mask, ids_restore = self.embeddings(
pixel_values=pixel_values,
training=training,
@ -943,14 +942,14 @@ class TFViTMAEModel(TFViTMAEPreTrainedModel):
def call(
self,
pixel_values: TFModelInputType | None = None,
noise: Optional[tf.Tensor] = None,
noise: tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
interpolate_pos_encoding: bool = False,
) -> Union[TFViTMAEModelOutput, tuple[tf.Tensor]]:
) -> TFViTMAEModelOutput | tuple[tf.Tensor]:
r"""
Returns:
@ -1219,7 +1218,7 @@ class TFViTMAEForPreTraining(TFViTMAEPreTrainedModel):
)
return patchified_pixel_values
def unpatchify(self, patchified_pixel_values, original_image_size: Optional[tuple[int, int]] = None):
def unpatchify(self, patchified_pixel_values, original_image_size: tuple[int, int] | None = None):
"""
Args:
patchified_pixel_values (`tf.Tensor` of shape `(batch_size, num_patches, patch_size**2 * num_channels)`:
@ -1294,14 +1293,14 @@ class TFViTMAEForPreTraining(TFViTMAEPreTrainedModel):
def call(
self,
pixel_values: TFModelInputType | None = None,
noise: Optional[tf.Tensor] = None,
noise: tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
interpolate_pos_encoding: bool = False,
) -> Union[TFViTMAEForPreTrainingOutput, tuple[tf.Tensor]]:
) -> TFViTMAEForPreTrainingOutput | tuple[tf.Tensor]:
r"""
Returns:

View File

@ -18,7 +18,7 @@ from __future__ import annotations
import warnings
from dataclasses import dataclass
from typing import Any, Optional, Union
from typing import Any
import numpy as np
import tensorflow as tf
@ -78,8 +78,8 @@ class TFWav2Vec2BaseModelOutput(ModelOutput):
heads.
"""
last_hidden_state: Optional[tf.Tensor] = None
extract_features: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
extract_features: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor] | None = None
attentions: tuple[tf.Tensor] | None = None
@ -184,7 +184,7 @@ def _compute_mask_indices(
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -729,7 +729,7 @@ class TFWav2Vec2Attention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel"""
@ -922,7 +922,7 @@ class TFWav2Vec2EncoderLayer(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False,
output_attentions: bool | None = False,
training: bool = False,
) -> tuple[tf.Tensor]:
attn_residual = hidden_states
@ -981,7 +981,7 @@ class TFWav2Vec2EncoderLayerStableLayerNorm(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False,
output_attentions: bool | None = False,
training: bool = False,
) -> tuple[tf.Tensor]:
attn_residual = hidden_states
@ -1031,11 +1031,11 @@ class TFWav2Vec2Encoder(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False,
output_hidden_states: Optional[bool] = False,
return_dict: Optional[bool] = True,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
output_attentions: bool | None = False,
output_hidden_states: bool | None = False,
return_dict: bool | None = True,
training: bool | None = False,
) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
@ -1113,11 +1113,11 @@ class TFWav2Vec2EncoderStableLayerNorm(keras.layers.Layer):
self,
hidden_states: tf.Tensor,
attention_mask: tf.Tensor | None = None,
output_attentions: Optional[bool] = False,
output_hidden_states: Optional[bool] = False,
return_dict: Optional[bool] = True,
training: Optional[bool] = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
output_attentions: bool | None = False,
output_hidden_states: bool | None = False,
return_dict: bool | None = True,
training: bool | None = False,
) -> TFBaseModelOutput | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
@ -1281,9 +1281,9 @@ class TFWav2Vec2MainLayer(keras.layers.Layer):
position_ids: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
**kwargs: Any,
):
@ -1516,11 +1516,11 @@ class TFWav2Vec2Model(TFWav2Vec2PreTrainedModel):
position_ids: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
"""
Returns:
@ -1622,12 +1622,12 @@ class TFWav2Vec2ForCTC(TFWav2Vec2PreTrainedModel):
position_ids: tf.Tensor | None = None,
head_mask: tf.Tensor | None = None,
inputs_embeds: tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_attentions: bool | None = None,
labels: tf.Tensor | None = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[TFCausalLMOutput, tuple[tf.Tensor]]:
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> TFCausalLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,

View File

@ -18,7 +18,6 @@ from __future__ import annotations
import math
import random
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -111,7 +110,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -129,7 +128,7 @@ class TFWhisperPositionalEmbedding(keras.layers.Layer):
self,
num_positions: int,
embedding_dim: int,
padding_idx: Optional[int] = None,
padding_idx: int | None = None,
embedding_initializer=None,
**kwargs,
):
@ -197,7 +196,7 @@ class TFWhisperAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel"""
@ -1262,15 +1261,15 @@ class TFWhisperModel(TFWhisperPreTrainedModel):
head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
decoder_inputs_embeds: Optional[tuple[Union[np.ndarray, tf.Tensor]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
encoder_outputs: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
decoder_inputs_embeds: tuple[np.ndarray | tf.Tensor] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], TFSeq2SeqModelOutput]:
) -> tuple[tf.Tensor] | TFSeq2SeqModelOutput:
r"""
Returns:
@ -1385,16 +1384,16 @@ class TFWhisperForConditionalGeneration(TFWhisperPreTrainedModel, TFCausalLangua
head_mask: np.ndarray | tf.Tensor | None = None,
decoder_head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
encoder_outputs: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
decoder_inputs_embeds: Optional[tuple[Union[np.ndarray, tf.Tensor]]] = None,
encoder_outputs: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
decoder_inputs_embeds: tuple[np.ndarray | tf.Tensor] | None = None,
labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[tuple[tf.Tensor], TFSeq2SeqLMOutput]:
) -> tuple[tf.Tensor] | TFSeq2SeqLMOutput:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the language modeling loss. Indices should either be in `[0, ..., config.vocab_size]`
@ -1473,15 +1472,15 @@ class TFWhisperForConditionalGeneration(TFWhisperPreTrainedModel, TFCausalLangua
def generate(
self,
inputs: Optional[tf.Tensor] = None,
generation_config: Optional[GenerationConfig] = None,
logits_processor: Optional[TFLogitsProcessorList] = None,
seed: Optional[list[int]] = None,
return_timestamps: Optional[bool] = None,
task: Optional[str] = None,
language: Optional[str] = None,
is_multilingual: Optional[bool] = None,
prompt_ids: Optional[tf.Tensor] = None,
inputs: tf.Tensor | None = None,
generation_config: GenerationConfig | None = None,
logits_processor: TFLogitsProcessorList | None = None,
seed: list[int] | None = None,
return_timestamps: bool | None = None,
task: str | None = None,
language: str | None = None,
is_multilingual: bool | None = None,
prompt_ids: tf.Tensor | None = None,
return_token_timestamps=None,
**kwargs,
):

View File

@ -18,7 +18,7 @@ from __future__ import annotations
import math
import random
from typing import Any, Optional, Union
from typing import Any
import numpy as np
import tensorflow as tf
@ -57,7 +57,7 @@ _CONFIG_FOR_DOC = "XGLMConfig"
LARGE_NEGATIVE = -1e8
def create_sinusoidal_positions(num_positions: int, embedding_dim: int, padding_idx: Optional[int]) -> tf.Tensor:
def create_sinusoidal_positions(num_positions: int, embedding_dim: int, padding_idx: int | None) -> tf.Tensor:
half_dim = embedding_dim // 2
emb = math.log(10000) / (half_dim - 1)
emb = tf.exp(tf.range(half_dim, dtype=tf.float32) * -emb)
@ -81,7 +81,7 @@ def create_sinusoidal_positions(num_positions: int, embedding_dim: int, padding_
def _create_position_ids_from_input_ids(
input_ids: tf.Tensor, past_key_values_length: int, padding_idx: Optional[int]
input_ids: tf.Tensor, past_key_values_length: int, padding_idx: int | None
) -> tf.Tensor:
"""
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
@ -94,7 +94,7 @@ def _create_position_ids_from_input_ids(
def _create_position_ids_from_inputs_embeds(
inputs_embeds: tf.Tensor, past_key_values_length: int, padding_idx: Optional[int]
inputs_embeds: tf.Tensor, past_key_values_length: int, padding_idx: int | None
) -> tf.Tensor:
"""
Args:
@ -129,7 +129,7 @@ def _make_causal_mask(input_ids_shape: tf.TensorShape, past_key_values_length: i
# Copied from transformers.models.bart.modeling_tf_bart._expand_mask
def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
def _expand_mask(mask: tf.Tensor, tgt_len: int | None = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
@ -184,7 +184,7 @@ class TFXGLMAttention(keras.layers.Layer):
past_key_value: tuple[tuple[tf.Tensor]] | None = None,
attention_mask: tf.Tensor | None = None,
layer_head_mask: tf.Tensor | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor | None]:
"""Input shape: Batch x Time x Channel"""
@ -356,7 +356,7 @@ class TFXGLMDecoderLayer(keras.layers.Layer):
layer_head_mask: tf.Tensor | None = None,
cross_attn_layer_head_mask: tf.Tensor | None = None,
past_key_value: tuple[tf.Tensor] | None = None,
training: Optional[bool] = False,
training: bool | None = False,
) -> tuple[tf.Tensor, tf.Tensor, tuple[tuple[tf.Tensor]]]:
"""
Args:
@ -459,7 +459,7 @@ class TFXGLMMainLayer(keras.layers.Layer):
config_class = XGLMConfig
def __init__(
self, config: XGLMConfig, embed_tokens: Optional[TFSharedEmbeddings] = None, *inputs, **kwargs: Any
self, config: XGLMConfig, embed_tokens: TFSharedEmbeddings | None = None, *inputs, **kwargs: Any
) -> None:
super().__init__(*inputs, **kwargs)
@ -525,15 +525,15 @@ class TFXGLMMainLayer(keras.layers.Layer):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs: Any,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@ -796,7 +796,7 @@ class TFXGLMModel(TFXGLMPreTrainedModel):
"""
def __init__(
self, config: XGLMConfig, embed_tokens: Optional[TFSharedEmbeddings] = None, *inputs: Any, **kwargs: Any
self, config: XGLMConfig, embed_tokens: TFSharedEmbeddings | None = None, *inputs: Any, **kwargs: Any
) -> None:
super().__init__(config, *inputs, **kwargs)
@ -818,15 +818,15 @@ class TFXGLMModel(TFXGLMPreTrainedModel):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs: Any,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
outputs = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
@ -872,7 +872,7 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
]
def __init__(
self, config: XGLMConfig, embed_tokens: Optional[TFSharedEmbeddings] = None, *inputs: Any, **kwargs: Any
self, config: XGLMConfig, embed_tokens: TFSharedEmbeddings | None = None, *inputs: Any, **kwargs: Any
) -> None:
super().__init__(config, *inputs, **kwargs)
@ -929,16 +929,16 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
cross_attn_head_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
labels: np.ndarray | tf.Tensor | None = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
**kwargs: Any,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]:
) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r"""
labels (`np.ndarray` or `tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set

View File

@ -21,7 +21,6 @@ from __future__ import annotations
import itertools
import warnings
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -400,7 +399,7 @@ class TFXLMMainLayer(keras.layers.Layer):
output_hidden_states=None,
return_dict=None,
training=False,
) -> Union[TFBaseModelOutput, tuple[tf.Tensor]]:
) -> TFBaseModelOutput | tuple[tf.Tensor]:
# removed: src_enc=None, src_len=None
if input_ids is not None and inputs_embeds is not None:
@ -599,7 +598,7 @@ class TFXLMWithLMHeadModelOutput(ModelOutput):
heads.
"""
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -881,14 +880,14 @@ class TFXLMWithLMHeadModel(TFXLMPreTrainedModel):
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None,
cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFXLMWithLMHeadModelOutput, tuple[tf.Tensor]]:
) -> TFXLMWithLMHeadModelOutput | tuple[tf.Tensor]:
transformer_outputs = self.transformer(
input_ids=input_ids,
attention_mask=attention_mask,
@ -957,15 +956,15 @@ class TFXLMForSequenceClassification(TFXLMPreTrainedModel, TFSequenceClassificat
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None,
cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1068,15 +1067,15 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss):
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None,
cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
if input_ids is not None:
num_choices = shape_list(input_ids)[1]
seq_length = shape_list(input_ids)[2]
@ -1184,15 +1183,15 @@ class TFXLMForTokenClassification(TFXLMPreTrainedModel, TFTokenClassificationLos
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None,
cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1273,16 +1272,16 @@ class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel, TFQuestionAnsweringL
token_type_ids: np.ndarray | tf.Tensor | None = None,
position_ids: np.ndarray | tf.Tensor | None = None,
lengths: np.ndarray | tf.Tensor | None = None,
cache: Optional[dict[str, tf.Tensor]] = None,
cache: dict[str, tf.Tensor] | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -19,7 +19,6 @@ from __future__ import annotations
import math
import warnings
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -686,12 +685,12 @@ class TFXLMRobertaEncoder(keras.layers.Layer):
encoder_hidden_states: tf.Tensor | None,
encoder_attention_mask: tf.Tensor | None,
past_key_values: tuple[tuple[tf.Tensor]] | None,
use_cache: Optional[bool],
use_cache: bool | None,
output_attentions: bool,
output_hidden_states: bool,
return_dict: bool,
training: bool = False,
) -> Union[TFBaseModelOutputWithPastAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPastAndCrossAttentions | tuple[tf.Tensor]:
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
@ -800,13 +799,13 @@ class TFXLMRobertaMainLayer(keras.layers.Layer):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, tuple[tf.Tensor]]:
) -> TFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]:
if not self.config.is_decoder:
use_cache = False
@ -1000,13 +999,13 @@ class TFXLMRobertaModel(TFXLMRobertaPreTrainedModel):
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
) -> Union[tuple, TFBaseModelOutputWithPoolingAndCrossAttentions]:
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool | None = False,
) -> tuple | TFBaseModelOutputWithPoolingAndCrossAttentions:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1153,12 +1152,12 @@ class TFXLMRobertaForMaskedLM(TFXLMRobertaPreTrainedModel, TFMaskedLanguageModel
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMaskedLMOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMaskedLMOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@ -1261,14 +1260,14 @@ class TFXLMRobertaForCausalLM(TFXLMRobertaPreTrainedModel, TFCausalLanguageModel
inputs_embeds: np.ndarray | tf.Tensor | None = None,
encoder_hidden_states: np.ndarray | tf.Tensor | None = None,
encoder_attention_mask: np.ndarray | tf.Tensor | None = None,
past_key_values: Optional[tuple[tuple[Union[np.ndarray, tf.Tensor]]]] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
past_key_values: tuple[tuple[np.ndarray | tf.Tensor]] | None = None,
use_cache: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFCausalLMOutputWithCrossAttentions, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFCausalLMOutputWithCrossAttentions | tuple[tf.Tensor]:
r"""
encoder_hidden_states (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@ -1421,12 +1420,12 @@ class TFXLMRobertaForSequenceClassification(TFXLMRobertaPreTrainedModel, TFSeque
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFSequenceClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFSequenceClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1513,12 +1512,12 @@ class TFXLMRobertaForMultipleChoice(TFXLMRobertaPreTrainedModel, TFMultipleChoic
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFMultipleChoiceModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFMultipleChoiceModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1622,12 +1621,12 @@ class TFXLMRobertaForTokenClassification(TFXLMRobertaPreTrainedModel, TFTokenCla
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFTokenClassifierOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFTokenClassifierOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1713,13 +1712,13 @@ class TFXLMRobertaForQuestionAnswering(TFXLMRobertaPreTrainedModel, TFQuestionAn
position_ids: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: Optional[bool] = False,
) -> Union[TFQuestionAnsweringModelOutput, tuple[tf.Tensor]]:
training: bool | None = False,
) -> TFQuestionAnsweringModelOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.

View File

@ -21,7 +21,6 @@ from __future__ import annotations
import warnings
from dataclasses import dataclass
from typing import Optional, Union
import numpy as np
import tensorflow as tf
@ -201,7 +200,7 @@ class TFXLNetRelativeAttention(keras.layers.Layer):
mems: np.ndarray | tf.Tensor | None = None,
target_mapping: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = False,
output_attentions: bool | None = False,
training: bool = False,
):
if g is not None:
@ -390,7 +389,7 @@ class TFXLNetLayer(keras.layers.Layer):
mems: np.ndarray | tf.Tensor | None = None,
target_mapping: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
output_attentions: Optional[bool] = False,
output_attentions: bool | None = False,
training: bool = False,
):
outputs = self.rel_attn(
@ -631,10 +630,10 @@ class TFXLNetMainLayer(keras.layers.Layer):
input_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_mems: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
):
if training and use_mems is None:
@ -863,7 +862,7 @@ class TFXLNetModelOutput(ModelOutput):
heads.
"""
last_hidden_state: Optional[tf.Tensor] = None
last_hidden_state: tf.Tensor | None = None
mems: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -900,7 +899,7 @@ class TFXLNetLMHeadModelOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
mems: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -934,7 +933,7 @@ class TFXLNetForSequenceClassificationOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
mems: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -968,7 +967,7 @@ class TFXLNetForTokenClassificationOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
mems: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -1004,7 +1003,7 @@ class TFXLNetForMultipleChoiceOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
logits: Optional[tf.Tensor] = None
logits: tf.Tensor | None = None
mems: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -1040,8 +1039,8 @@ class TFXLNetForQuestionAnsweringSimpleOutput(ModelOutput):
"""
loss: tf.Tensor | None = None
start_logits: Optional[tf.Tensor] = None
end_logits: Optional[tf.Tensor] = None
start_logits: tf.Tensor | None = None
end_logits: tf.Tensor | None = None
mems: list[tf.Tensor] | None = None
hidden_states: tuple[tf.Tensor, ...] | None = None
attentions: tuple[tf.Tensor, ...] | None = None
@ -1189,12 +1188,12 @@ class TFXLNetModel(TFXLNetPreTrainedModel):
input_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_mems: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
training: bool = False,
) -> Union[TFXLNetModelOutput, tuple[tf.Tensor]]:
) -> TFXLNetModelOutput | tuple[tf.Tensor]:
outputs = self.transformer(
input_ids=input_ids,
attention_mask=attention_mask,
@ -1297,13 +1296,13 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel, TFCausalLanguageModelingLoss):
input_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_mems: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFXLNetLMHeadModelOutput, tuple[tf.Tensor]]:
) -> TFXLNetLMHeadModelOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,
@ -1432,13 +1431,13 @@ class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel, TFSequenceClassif
input_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_mems: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFXLNetForSequenceClassificationOutput, tuple[tf.Tensor]]:
) -> TFXLNetForSequenceClassificationOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@ -1533,13 +1532,13 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss):
target_mapping: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_mems: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFXLNetForMultipleChoiceOutput, tuple[tf.Tensor]]:
) -> TFXLNetForMultipleChoiceOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@ -1647,13 +1646,13 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel, TFTokenClassificatio
input_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_mems: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
labels: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFXLNetForTokenClassificationOutput, tuple[tf.Tensor]]:
) -> TFXLNetForTokenClassificationOutput | tuple[tf.Tensor]:
r"""
labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@ -1737,14 +1736,14 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel, TFQuestionAnswer
input_mask: np.ndarray | tf.Tensor | None = None,
head_mask: np.ndarray | tf.Tensor | None = None,
inputs_embeds: np.ndarray | tf.Tensor | None = None,
use_mems: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
use_mems: bool | None = None,
output_attentions: bool | None = None,
output_hidden_states: bool | None = None,
return_dict: bool | None = None,
start_positions: np.ndarray | tf.Tensor | None = None,
end_positions: np.ndarray | tf.Tensor | None = None,
training: bool = False,
) -> Union[TFXLNetForQuestionAnsweringSimpleOutput, tuple[tf.Tensor]]:
) -> TFXLNetForQuestionAnsweringSimpleOutput | tuple[tf.Tensor]:
r"""
start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.