More PYUP fixes (#38883)

More pyup fixes

Signed-off-by: cyy <cyyever@outlook.com>
This commit is contained in:
Yuanyuan Chen 2025-06-18 21:38:08 +08:00 committed by GitHub
parent 12d4c5b66f
commit 1fc67a25c6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
150 changed files with 273 additions and 355 deletions

View File

@ -710,8 +710,8 @@ class AssistantToTargetTranslator:
assistant_model: Optional["PreTrainedModel"] = None,
assistant_prune_lm_head: bool = False,
):
self._target_tokenizer: "PreTrainedTokenizerBase" = target_tokenizer
self._assistant_tokenizer: "PreTrainedTokenizerBase" = assistant_tokenizer
self._target_tokenizer: PreTrainedTokenizerBase = target_tokenizer
self._assistant_tokenizer: PreTrainedTokenizerBase = assistant_tokenizer
self._assistant_model_device: str = (
assistant_model_device if assistant_model is None else assistant_model.device
)

View File

@ -72,7 +72,7 @@ class TextStreamer(BaseStreamer):
```
"""
def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, **decode_kwargs):
def __init__(self, tokenizer: AutoTokenizer, skip_prompt: bool = False, **decode_kwargs):
self.tokenizer = tokenizer
self.skip_prompt = skip_prompt
self.decode_kwargs = decode_kwargs
@ -206,7 +206,7 @@ class TextIteratorStreamer(TextStreamer):
"""
def __init__(
self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs
self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs
):
super().__init__(tokenizer, skip_prompt, **decode_kwargs)
self.text_queue = Queue()
@ -284,7 +284,7 @@ class AsyncTextIteratorStreamer(TextStreamer):
"""
def __init__(
self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs
self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs
):
super().__init__(tokenizer, skip_prompt, **decode_kwargs)
self.text_queue = asyncio.Queue()

View File

@ -4723,7 +4723,7 @@ class GenerationMixin(ContinuousMixin):
)
if return_dict_in_generate and output_scores:
beam_indices = tuple((beam_indices[beam_idx[i]] + (beam_idx[i],) for i in range(len(beam_indices))))
beam_indices = tuple(beam_indices[beam_idx[i]] + (beam_idx[i],) for i in range(len(beam_indices)))
# increase cur_len
cur_len = cur_len + 1

View File

@ -1626,8 +1626,8 @@ class NeptuneCallback(TrainerCallback):
target_path = consistent_checkpoint_path
except OSError as e:
logger.warning(
"NeptuneCallback was unable to made a copy of checkpoint due to I/O exception: '{}'. "
"Could fail trying to upload.".format(e)
f"NeptuneCallback was unable to made a copy of checkpoint due to I/O exception: '{e}'. "
"Could fail trying to upload."
)
self._metadata_namespace[self._target_checkpoints_namespace].upload_files(target_path)
@ -1976,9 +1976,7 @@ class ClearMLCallback(TrainerCallback):
)
except Exception as e:
logger.warning(
"Could not remove checkpoint `{}` after going over the `save_total_limit`. Error is: {}".format(
self._checkpoints_saved[0].name, e
)
f"Could not remove checkpoint `{self._checkpoints_saved[0].name}` after going over the `save_total_limit`. Error is: {e}"
)
break
self._checkpoints_saved = self._checkpoints_saved[1:]

View File

@ -1409,10 +1409,10 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
def prepare_tf_dataset(
self,
dataset: "datasets.Dataset", # noqa:F821
dataset: datasets.Dataset, # noqa:F821
batch_size: int = 8,
shuffle: bool = True,
tokenizer: Optional["PreTrainedTokenizerBase"] = None,
tokenizer: Optional[PreTrainedTokenizerBase] = None,
collate_fn: Optional[Callable] = None,
collate_fn_args: Optional[dict[str, Any]] = None,
drop_remainder: Optional[bool] = None,

View File

@ -4424,10 +4424,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, PushToHubMixin, PeftAdapterMi
raise ValueError("DeepSpeed Zero-3 is not compatible with passing a `device_map`.")
if not is_accelerate_available():
raise ValueError(
(
"Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` "
"requires `accelerate`. You can install it with `pip install accelerate`"
)
"Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` "
"requires `accelerate`. You can install it with `pip install accelerate`"
)
# handling bnb config from kwargs, remove after `load_in_{4/8}bit` deprecation.

View File

@ -203,7 +203,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
pieces = self.sp_model.encode(text, out_type=str)
new_pieces = []
for piece in pieces:
if len(piece) > 1 and piece[-1] == str(",") and piece[-2].isdigit():
if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit():
# Logic to handle special cases see https://github.com/google-research/bert/blob/master/README.md#tokenization
# `9,9` -> ['▁9', ',', '9'] instead of [`_9,`, '9']
cur_pieces = self.sp_model.EncodeAsPieces(piece[:-1].replace(SPIECE_UNDERLINE, ""))

View File

@ -830,7 +830,7 @@ class BambaMixer(nn.Module):
# 2. Compute the state for each intra-chunk
# (right term of low-rank factorization of off-diagonal blocks; B terms)
decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum))
decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum)
B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None]
states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2)

View File

@ -632,7 +632,7 @@ class BambaMixer(nn.Module):
# 2. Compute the state for each intra-chunk
# (right term of low-rank factorization of off-diagonal blocks; B terms)
decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum))
decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum)
B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None]
states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2)

View File

@ -32,7 +32,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"}
# See all BART models at https://huggingface.co/models?filter=bart
@lru_cache()
@lru_cache
def bytes_to_unicode():
"""
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control

View File

@ -110,7 +110,7 @@ class BeitDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
# Based on timm implementation, which can be found here:
@ -513,8 +513,8 @@ class BeitLayer(nn.Module):
init_values = config.layer_scale_init_value
if init_values > 0:
self.lambda_1 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
self.lambda_2 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
self.lambda_1 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
self.lambda_2 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
else:
self.lambda_1, self.lambda_2 = None, None

View File

@ -934,7 +934,7 @@ class SentencepieceTokenizer:
pieces = self.sp_model.encode(text, out_type=str)
new_pieces = []
for piece in pieces:
if len(piece) > 1 and piece[-1] == str(",") and piece[-2].isdigit():
if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit():
cur_pieces = self.sp_model.EncodeAsPieces(piece[:-1].replace(SPIECE_UNDERLINE, ""))
if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE:
if len(cur_pieces[0]) == 1:

View File

@ -115,7 +115,7 @@ class Dictionary:
except FileNotFoundError as fnfe:
raise fnfe
except UnicodeError:
raise Exception("Incorrect encoding detected in {}, please rebuild the dataset".format(f))
raise Exception(f"Incorrect encoding detected in {f}, please rebuild the dataset")
return
lines = f.readlines()
@ -133,11 +133,11 @@ class Dictionary:
word = line
if word in self and not overwrite:
raise RuntimeError(
"Duplicate word found when loading Dictionary: '{}'. "
f"Duplicate word found when loading Dictionary: '{word}'. "
"Duplicate words can overwrite earlier ones by adding the "
"#fairseq:overwrite flag at the end of the corresponding row "
"in the dictionary file. If using the Camembert model, please "
"download an updated copy of the model file.".format(word)
"download an updated copy of the model file."
)
self.add_symbol(word, n=count, overwrite=overwrite)
except ValueError:

View File

@ -310,7 +310,7 @@ class BitDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
def make_div(value, divisor=8):

View File

@ -35,7 +35,7 @@ VOCAB_FILES_NAMES = {
}
@lru_cache()
@lru_cache
# Copied from transformers.models.roberta.tokenization_roberta.bytes_to_unicode
def bytes_to_unicode():
"""

View File

@ -641,9 +641,7 @@ class BlipTextModel(BlipTextPreTrainedModel):
extended_attention_mask = attention_mask[:, None, None, :]
else:
raise ValueError(
"Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format(
input_shape, attention_mask.shape
)
f"Wrong shape for input_ids (shape {input_shape}) or attention_mask (shape {attention_mask.shape})"
)
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
@ -723,7 +721,7 @@ class BlipTextModel(BlipTextPreTrainedModel):
past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
if attention_mask is None:
attention_mask = torch.ones(((batch_size, seq_length + past_key_values_length))).to(device)
attention_mask = torch.ones((batch_size, seq_length + past_key_values_length)).to(device)
# We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
# ourselves in which case we just need to make it broadcastable to all heads.

View File

@ -800,9 +800,7 @@ class TFBlipTextModel(TFBlipTextPreTrainedModel):
extended_attention_mask = attention_mask[:, None, None, :]
else:
raise ValueError(
"Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format(
input_shape, attention_mask.shape
)
f"Wrong shape for input_ids (shape {input_shape}) or attention_mask (shape {attention_mask.shape})"
)
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
@ -881,7 +879,7 @@ class TFBlipTextModel(TFBlipTextPreTrainedModel):
past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
if attention_mask is None:
attention_mask = tf.ones(((batch_size, seq_length + past_key_values_length)))
attention_mask = tf.ones((batch_size, seq_length + past_key_values_length))
# We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
# ourselves in which case we just need to make it broadcastable to all heads.

View File

@ -1144,9 +1144,7 @@ class Blip2QFormerModel(Blip2PreTrainedModel):
extended_attention_mask = attention_mask[:, None, None, :]
else:
raise ValueError(
"Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format(
input_shape, attention_mask.shape
)
f"Wrong shape for input_ids (shape {input_shape}) or attention_mask (shape {attention_mask.shape})"
)
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for

View File

@ -98,7 +98,7 @@ def convert_bloom_checkpoint_to_pytorch(
config = BloomConfig()
for j, file in enumerate(file_names):
print("Processing file: {}".format(file))
print(f"Processing file: {file}")
tensors = None
for i in range(pretraining_tp):
@ -132,7 +132,7 @@ def convert_bloom_checkpoint_to_pytorch(
tensors,
os.path.join(
pytorch_dump_folder_path,
"pytorch_model_{}-of-{}.bin".format(str(j + 1).zfill(5), str(len(file_names)).zfill(5)),
f"pytorch_model_{str(j + 1).zfill(5)}-of-{str(len(file_names)).zfill(5)}.bin",
),
)
@ -140,8 +140,8 @@ def convert_bloom_checkpoint_to_pytorch(
value = tensors[key]
total_size += value.numel() * get_dtype_size(value.dtype)
if key not in index_dict["weight_map"]:
index_dict["weight_map"][key] = "pytorch_model_{}-of-{}.bin".format(
str(j + 1).zfill(5), str(len(file_names)).zfill(5)
index_dict["weight_map"][key] = (
f"pytorch_model_{str(j + 1).zfill(5)}-of-{str(len(file_names)).zfill(5)}.bin"
)
config = BloomConfig()

View File

@ -610,7 +610,7 @@ class ClapAudioLayer(nn.Module):
mask_windows = window_partition(img_mask, self.window_size)
mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
attn_mask = attn_mask.masked_fill(attn_mask != 0, -100.0).masked_fill(attn_mask == 0, 0.0)
else:
attn_mask = None
return attn_mask

View File

@ -34,7 +34,7 @@ VOCAB_FILES_NAMES = {
}
@lru_cache()
@lru_cache
def bytes_to_unicode():
"""
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control
@ -488,7 +488,7 @@ class CLIPTokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
@ -506,8 +506,8 @@ class CLIPTokenizer(PreTrainedTokenizer):
for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]):
if index != token_index:
logger.warning(
"Saving vocabulary to {}: BPE merge indices are not consecutive."
" Please check that the tokenizer is not corrupted!".format(merge_file)
f"Saving vocabulary to {merge_file}: BPE merge indices are not consecutive."
" Please check that the tokenizer is not corrupted!"
)
index = token_index
writer.write(" ".join(bpe_tokens) + "\n")

View File

@ -181,7 +181,7 @@ def convert_clipseg_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_
missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
if missing_keys != ["clip.text_model.embeddings.position_ids", "clip.vision_model.embeddings.position_ids"]:
raise ValueError("Missing keys that are not expected: {}".format(missing_keys))
raise ValueError(f"Missing keys that are not expected: {missing_keys}")
if unexpected_keys != ["decoder.reduce.weight", "decoder.reduce.bias"]:
raise ValueError(f"Unexpected keys: {unexpected_keys}")

View File

@ -34,7 +34,7 @@ VOCAB_FILES_NAMES = {
}
@lru_cache()
@lru_cache
# Copied from transformers.models.gpt2.tokenization_gpt2.bytes_to_unicode
def bytes_to_unicode():
"""

View File

@ -42,7 +42,7 @@ VOCAB_FILES_NAMES = {
}
@lru_cache()
@lru_cache
def bytes_to_unicode():
"""
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control

View File

@ -70,7 +70,7 @@ class ConvNextDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
class ConvNextLayerNorm(nn.Module):
@ -149,7 +149,7 @@ class ConvNextLayer(nn.Module):
self.act = ACT2FN[config.hidden_act]
self.pwconv2 = nn.Linear(4 * dim, dim)
self.layer_scale_parameter = (
nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True)
nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True)
if config.layer_scale_init_value > 0
else None
)

View File

@ -70,7 +70,7 @@ class ConvNextV2DropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
class ConvNextV2GRN(nn.Module):

View File

@ -207,7 +207,7 @@ class CpmTokenizer(PreTrainedTokenizer):
pieces = self.sp_model.encode(text, out_type=str)
new_pieces = []
for piece in pieces:
if len(piece) > 1 and piece[-1] == str(",") and piece[-2].isdigit():
if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit():
cur_pieces = self.sp_model.EncodeAsPieces(piece[:-1].replace(SPIECE_UNDERLINE, ""))
if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE:
if len(cur_pieces[0]) == 1:

View File

@ -86,7 +86,7 @@ class CvtDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
class CvtEmbeddings(nn.Module):

View File

@ -187,7 +187,7 @@ class DFineMultiscaleDeformableAttention(nn.Module):
sampling_locations = reference_points[:, :, None, :, :2] + offset
else:
raise ValueError(
"Last dim of reference_points must be 2 or 4, but get {} instead.".format(reference_points.shape[-1])
f"Last dim of reference_points must be 2 or 4, but get {reference_points.shape[-1]} instead."
)
output = self.ms_deformable_attn_core(

View File

@ -517,7 +517,7 @@ class DFineMultiscaleDeformableAttention(nn.Module):
sampling_locations = reference_points[:, :, None, :, :2] + offset
else:
raise ValueError(
"Last dim of reference_points must be 2 or 4, but get {} instead.".format(reference_points.shape[-1])
f"Last dim of reference_points must be 2 or 4, but get {reference_points.shape[-1]} instead."
)
output = self.ms_deformable_attn_core(

View File

@ -384,7 +384,7 @@ def gen_sine_position_embeddings(pos_tensor, hidden_size=256):
pos = torch.cat((pos_y, pos_x, pos_w, pos_h), dim=2)
else:
raise ValueError("Unknown pos_tensor shape(-1):{}".format(pos_tensor.size(-1)))
raise ValueError(f"Unknown pos_tensor shape(-1):{pos_tensor.size(-1)}")
return pos
@ -1254,7 +1254,7 @@ class DabDetrModel(DabDetrPreTrainedModel):
self.num_patterns = config.num_patterns
if not isinstance(self.num_patterns, int):
logger.warning("num_patterns should be int but {}".format(type(self.num_patterns)))
logger.warning(f"num_patterns should be int but {type(self.num_patterns)}")
self.num_patterns = 0
if self.num_patterns > 0:
self.patterns = nn.Embedding(self.num_patterns, self.hidden_size)

View File

@ -157,24 +157,12 @@ def recursively_load_weights(orig_dict, hf_model, model_name):
elif len(mapped_key) == 3:
integers = re.findall(r"\b\d+\b", name)
if mapped_key[0][0] == "d":
mapped_key = "{}.{}.{}{}.{}".format(
mapped_key[0],
str(int(integers[0]) - 1),
mapped_key[1],
str(int(integers[1]) - 1),
mapped_key[2],
)
mapped_key = f"{mapped_key[0]}.{str(int(integers[0]) - 1)}.{mapped_key[1]}{str(int(integers[1]) - 1)}.{mapped_key[2]}"
else:
mapped_key = "{}.{}.{}{}.{}".format(
mapped_key[0],
str(int(integers[0]) - 1),
mapped_key[1],
str(int(integers[1]) + 1),
mapped_key[2],
)
mapped_key = f"{mapped_key[0]}.{str(int(integers[0]) - 1)}.{mapped_key[1]}{str(int(integers[1]) + 1)}.{mapped_key[2]}"
elif len(mapped_key) == 2:
integers = re.findall(r"\b\d+\b", name)
mapped_key = "{}.{}.{}".format(mapped_key[0], str(int(integers[0]) - 1), mapped_key[1])
mapped_key = f"{mapped_key[0]}.{str(int(integers[0]) - 1)}.{mapped_key[1]}"
is_used = True
if "weight_g" in name:

View File

@ -185,18 +185,12 @@ def load_beit_model(args, is_finetuned, is_large):
missing_keys = warn_missing_keys
if len(missing_keys) > 0:
print(
"Weights of {} not initialized from pretrained model: {}".format(
model.__class__.__name__, missing_keys
)
)
print(f"Weights of {model.__class__.__name__} not initialized from pretrained model: {missing_keys}")
if len(unexpected_keys) > 0:
print("Weights from pretrained model not used in {}: {}".format(model.__class__.__name__, unexpected_keys))
print(f"Weights from pretrained model not used in {model.__class__.__name__}: {unexpected_keys}")
if len(ignore_missing_keys) > 0:
print(
"Ignored weights of {} not initialized from pretrained model: {}".format(
model.__class__.__name__, ignore_missing_keys
)
f"Ignored weights of {model.__class__.__name__} not initialized from pretrained model: {ignore_missing_keys}"
)
if len(error_msgs) > 0:
print("\n".join(error_msgs))

View File

@ -101,7 +101,7 @@ class Data2VecVisionDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
# Copied from transformers.models.beit.modeling_beit.BeitEmbeddings with Beit->Data2VecVision
@ -515,8 +515,8 @@ class Data2VecVisionLayer(nn.Module):
init_values = config.layer_scale_init_value
if init_values > 0:
self.lambda_1 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
self.lambda_2 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
self.lambda_1 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
self.lambda_2 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
else:
self.lambda_1, self.lambda_2 = None, None

View File

@ -306,7 +306,7 @@ class TFData2VecVisionSelfAttention(keras.layers.Layer):
hidden_states: tf.Tensor,
head_mask: tf.Tensor,
output_attentions: bool,
relative_position_bias: Optional["TFData2VecVisionRelativePositionBias"] = None,
relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None,
training: bool = False,
) -> tuple[tf.Tensor]:
batch_size = shape_list(hidden_states)[0]
@ -416,7 +416,7 @@ class TFData2VecVisionAttention(keras.layers.Layer):
input_tensor: tf.Tensor,
head_mask: tf.Tensor,
output_attentions: bool,
relative_position_bias: Optional["TFData2VecVisionRelativePositionBias"] = None,
relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None,
training: bool = False,
) -> tuple[tf.Tensor]:
self_outputs = self.attention(
@ -538,8 +538,8 @@ class TFData2VecVisionLayer(keras.layers.Layer):
trainable=True,
name="lambda_2",
)
self.lambda_1.assign(self.init_values * tf.ones((self.config.hidden_size)))
self.lambda_2.assign(self.init_values * tf.ones((self.config.hidden_size)))
self.lambda_1.assign(self.init_values * tf.ones(self.config.hidden_size))
self.lambda_2.assign(self.init_values * tf.ones(self.config.hidden_size))
else:
self.lambda_1, self.lambda_2 = None, None
@ -570,7 +570,7 @@ class TFData2VecVisionLayer(keras.layers.Layer):
hidden_states: tf.Tensor,
head_mask: tf.Tensor,
output_attentions: bool,
relative_position_bias: Optional["TFData2VecVisionRelativePositionBias"] = None,
relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None,
training: bool = False,
) -> tuple[tf.Tensor]:
self_attention_outputs = self.attention(

View File

@ -113,7 +113,7 @@ class DeepseekV3TopkRouter(nn.Module):
self.norm_topk_prob = config.norm_topk_prob
self.weight = nn.Parameter(torch.empty((self.n_routed_experts, config.hidden_size)))
self.register_buffer("e_score_correction_bias", torch.zeros((self.n_routed_experts)))
self.register_buffer("e_score_correction_bias", torch.zeros(self.n_routed_experts))
@torch.no_grad()
def get_topk_indices(self, scores):

View File

@ -110,7 +110,7 @@ class DeepseekV3TopkRouter(nn.Module):
self.norm_topk_prob = config.norm_topk_prob
self.weight = nn.Parameter(torch.empty((self.n_routed_experts, config.hidden_size)))
self.register_buffer("e_score_correction_bias", torch.zeros((self.n_routed_experts)))
self.register_buffer("e_score_correction_bias", torch.zeros(self.n_routed_experts))
@torch.no_grad()
def get_topk_indices(self, scores):

View File

@ -270,7 +270,7 @@ class EfficientFormerDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
class EfficientFormerFlat(nn.Module):
@ -303,8 +303,8 @@ class EfficientFormerMeta3D(nn.Module):
self.drop_path = EfficientFormerDropPath(drop_path) if drop_path > 0.0 else nn.Identity()
self.use_layer_scale = config.use_layer_scale
if config.use_layer_scale:
self.layer_scale_1 = nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True)
self.layer_scale_2 = nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True)
self.layer_scale_1 = nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True)
self.layer_scale_2 = nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True)
def forward(self, hidden_states: torch.Tensor, output_attentions: bool = False) -> tuple[torch.Tensor]:
self_attention_outputs = self.token_mixer(self.layernorm1(hidden_states), output_attentions)
@ -370,8 +370,8 @@ class EfficientFormerMeta4D(nn.Module):
self.drop_path = EfficientFormerDropPath(drop_path) if drop_path > 0.0 else nn.Identity()
self.use_layer_scale = config.use_layer_scale
if config.use_layer_scale:
self.layer_scale_1 = nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True)
self.layer_scale_2 = nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True)
self.layer_scale_1 = nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True)
self.layer_scale_2 = nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True)
def forward(self, hidden_states: torch.Tensor) -> tuple[torch.Tensor]:
outputs = self.token_mixer(hidden_states)

View File

@ -14,7 +14,6 @@
# limitations under the License.
"""Tokenization classes for Ernie-M."""
import io
import os
import unicodedata
from typing import Any, Optional
@ -172,7 +171,7 @@ class ErnieMTokenizer(PreTrainedTokenizer):
def clean_text(self, text):
"""Performs invalid character removal and whitespace cleanup on text."""
return "".join((self.SP_CHAR_MAPPING.get(c, c) for c in text))
return "".join(self.SP_CHAR_MAPPING.get(c, c) for c in text)
def _tokenize(self, text, enable_sampling=False, nbest_size=64, alpha=0.1):
"""Tokenize a string."""
@ -373,7 +372,7 @@ class ErnieMTokenizer(PreTrainedTokenizer):
def load_vocab(self, filepath):
token_to_idx = {}
with io.open(filepath, "r", encoding="utf-8") as f:
with open(filepath, "r", encoding="utf-8") as f:
for index, line in enumerate(f):
token = line.rstrip("\n")
token_to_idx[token] = int(index)

View File

@ -118,7 +118,7 @@ class MegaSimpleRelativePositionalBias(nn.Module):
def forward(self, seq_len):
if seq_len > self.max_positions:
raise ValueError("Sequence length {} going beyond max length {}".format(seq_len, self.max_positions))
raise ValueError(f"Sequence length {seq_len} going beyond max length {self.max_positions}")
# seq_len * 2 - 1
bias = self.rel_pos_bias[(self.max_positions - seq_len) : (self.max_positions + seq_len - 1)]
@ -298,7 +298,7 @@ class MegaSequenceNorm(nn.Module):
elif norm_type == "syncbatchnorm":
self.norm = nn.SyncBatchNorm(embedding_dim, eps=eps, affine=affine)
else:
raise ValueError("Unknown norm type: {}".format(norm_type))
raise ValueError(f"Unknown norm type: {norm_type}")
def forward(self, input):
if isinstance(self.norm, nn.modules.batchnorm._BatchNorm):
@ -563,7 +563,7 @@ class MegaGatedCrossAttention(nn.Module):
elif self.config.relative_positional_bias == "rotary":
self.rel_pos_bias = MegaRotaryRelativePositionalBias(config)
else:
raise ValueError("unknown relative position bias: {}".format(self.config.relative_positional_bias))
raise ValueError(f"unknown relative position bias: {self.config.relative_positional_bias}")
self.softmax = nn.Softmax(dim=-1)

View File

@ -287,7 +287,7 @@ class NatDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
class NeighborhoodAttention(nn.Module):

View File

@ -99,7 +99,7 @@ TAPEX_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
"""
@lru_cache()
@lru_cache
def bytes_to_unicode():
"""
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control

View File

@ -79,7 +79,7 @@ class VanDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
class VanOverlappingPatchEmbedder(nn.Module):
@ -204,7 +204,7 @@ class VanLayerScaling(nn.Module):
def __init__(self, hidden_size: int, initial_value: float = 1e-2):
super().__init__()
self.weight = nn.Parameter(initial_value * torch.ones((hidden_size)), requires_grad=True)
self.weight = nn.Parameter(initial_value * torch.ones(hidden_size), requires_grad=True)
def forward(self, hidden_state: torch.Tensor) -> torch.Tensor:
# unsqueezing for broadcasting

View File

@ -275,7 +275,7 @@ class DinatDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
class NeighborhoodAttention(nn.Module):

View File

@ -343,7 +343,7 @@ class Dinov2DropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
class Dinov2MLP(nn.Module):

View File

@ -360,7 +360,7 @@ class Dinov2WithRegistersDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
class Dinov2WithRegistersMLP(nn.Module):

View File

@ -393,7 +393,7 @@ class DonutSwinDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
# Copied from transformers.models.swin.modeling_swin.SwinSelfAttention with Swin->DonutSwin
@ -625,7 +625,7 @@ class DonutSwinLayer(nn.Module):
mask_windows = window_partition(img_mask, self.window_size)
mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
attn_mask = attn_mask.masked_fill(attn_mask != 0, -100.0).masked_fill(attn_mask == 0, 0.0)
else:
attn_mask = None
return attn_mask

View File

@ -1414,7 +1414,7 @@ class EsmFoldInvariantPointAttention(nn.Module):
self.linear_b = EsmFoldLinear(c_z, config.num_heads_ipa)
self.head_weights = nn.Parameter(torch.zeros((config.num_heads_ipa)))
self.head_weights = nn.Parameter(torch.zeros(config.num_heads_ipa))
concat_out_dim = config.num_heads_ipa * (c_z + config.ipa_dim + config.num_v_points * 4)
self.linear_out = EsmFoldLinear(concat_out_dim, c_s, init="final")

View File

@ -398,7 +398,7 @@ def map_structure_with_atom_order(in_list: list, first_call: bool = True) -> lis
return in_list
@functools.lru_cache(maxsize=None)
@functools.cache
def load_stereo_chemical_props() -> tuple[
Mapping[str, list[Bond]],
Mapping[str, list[Bond]],

View File

@ -16,7 +16,7 @@
from __future__ import annotations
from collections.abc import Sequence
from functools import lru_cache
from functools import cache
from typing import Any, Callable, Optional
import numpy as np
@ -75,7 +75,7 @@ def rot_vec_mul(r: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
)
@lru_cache(maxsize=None)
@cache
def identity_rot_mats(
batch_dims: tuple[int, ...],
dtype: Optional[torch.dtype] = None,
@ -90,7 +90,7 @@ def identity_rot_mats(
return rots
@lru_cache(maxsize=None)
@cache
def identity_trans(
batch_dims: tuple[int, ...],
dtype: Optional[torch.dtype] = None,
@ -101,7 +101,7 @@ def identity_trans(
return trans
@lru_cache(maxsize=None)
@cache
def identity_quats(
batch_dims: tuple[int, ...],
dtype: Optional[torch.dtype] = None,
@ -220,7 +220,7 @@ _CACHED_QUATS: dict[str, np.ndarray] = {
}
@lru_cache(maxsize=None)
@cache
def _get_quat(quat_key: str, dtype: torch.dtype, device: torch.device) -> torch.Tensor:
return torch.tensor(_CACHED_QUATS[quat_key], dtype=dtype, device=device)
@ -1070,7 +1070,7 @@ class Rigid:
e0 = [c / denom for c in e0]
dot = sum((c1 * c2 for c1, c2 in zip(e0, e1)))
e1 = [c2 - c1 * dot for c1, c2 in zip(e0, e1)]
denom = torch.sqrt(sum((c * c for c in e1)) + eps * torch.ones_like(e1[0]))
denom = torch.sqrt(sum(c * c for c in e1) + eps * torch.ones_like(e1[0]))
e1 = [c / denom for c in e1]
e2 = [
e0[1] * e1[2] - e0[2] * e1[1],

View File

@ -949,7 +949,7 @@ class FalconH1Mixer(nn.Module):
# 2. Compute the state for each intra-chunk
# (right term of low-rank factorization of off-diagonal blocks; B terms)
decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum))
decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum)
B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None]
states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2)

View File

@ -748,7 +748,7 @@ class FalconH1Mixer(nn.Module):
# 2. Compute the state for each intra-chunk
# (right term of low-rank factorization of off-diagonal blocks; B terms)
decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum))
decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum)
B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None]
states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2)

View File

@ -318,7 +318,7 @@ class FlavaImageProcessor(BaseImageProcessor):
image_processor_dict["codebook_crop_size"] = kwargs.pop("codebook_crop_size")
return super().from_dict(image_processor_dict, **kwargs)
@lru_cache()
@lru_cache
def masking_generator(
self,
input_size_patches,

View File

@ -273,7 +273,7 @@ class FlavaImageProcessorFast(BaseImageProcessorFast):
image_processor_dict["codebook_crop_size"] = kwargs.pop("codebook_crop_size")
return super().from_dict(image_processor_dict, **kwargs)
@lru_cache()
@lru_cache
def masking_generator(
self,
input_size_patches,

View File

@ -1446,7 +1446,7 @@ class FlavaImageCodebook(FlavaPreTrainedModel):
param.requires_grad = False
def get_codebook_indices(self, pixel_values: torch.Tensor) -> torch.Tensor:
"""
f"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Codebook pixel values can be obtained using [`AutoImageProcessor`] by passing
@ -1458,8 +1458,8 @@ class FlavaImageCodebook(FlavaPreTrainedModel):
>>> import requests
>>> from transformers import AutoImageProcessor, FlavaImageCodebook
>>> model = FlavaImageCodebook.from_pretrained("{0}")
>>> image_processor = AutoImageProcessor.from_pretrained("{0}")
>>> model = FlavaImageCodebook.from_pretrained("{_CHECKPOINT_FOR_CODEBOOK_DOC}")
>>> image_processor = AutoImageProcessor.from_pretrained("{_CHECKPOINT_FOR_CODEBOOK_DOC}")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
@ -1469,7 +1469,7 @@ class FlavaImageCodebook(FlavaPreTrainedModel):
>>> outputs = model.get_codebook_indices(**inputs)
```
""".format(_CHECKPOINT_FOR_CODEBOOK_DOC)
"""
z_logits = self.blocks(pixel_values)
return torch.argmax(z_logits, axis=1)
@ -1478,7 +1478,7 @@ class FlavaImageCodebook(FlavaPreTrainedModel):
return nn.Softmax(dim=1)(z_logits)
def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
"""
f"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Codebook pixel values can be obtained using [`AutoImageProcessor`] by passing
@ -1491,8 +1491,8 @@ class FlavaImageCodebook(FlavaPreTrainedModel):
>>> import requests
>>> from transformers import AutoImageProcessor, FlavaImageCodebook
>>> model = FlavaImageCodebook.from_pretrained("{0}")
>>> image_processor = AutoImageProcessor.from_pretrained("{0}")
>>> model = FlavaImageCodebook.from_pretrained("{_CHECKPOINT_FOR_CODEBOOK_DOC}")
>>> image_processor = AutoImageProcessor.from_pretrained("{_CHECKPOINT_FOR_CODEBOOK_DOC}")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
@ -1504,7 +1504,7 @@ class FlavaImageCodebook(FlavaPreTrainedModel):
>>> print(outputs.shape)
(1, 196)
```
""".format(_CHECKPOINT_FOR_CODEBOOK_DOC)
"""
if len(pixel_values.shape) != 4:
raise ValueError(f"input shape {pixel_values.shape} is not 4d")
if pixel_values.shape[1] != self.input_channels:

View File

@ -177,7 +177,7 @@ class FNetTokenizer(PreTrainedTokenizer):
pieces = self.sp_model.encode(text, out_type=str)
new_pieces = []
for piece in pieces:
if len(piece) > 1 and piece[-1] == str(",") and piece[-2].isdigit():
if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit():
cur_pieces = self.sp_model.EncodeAsPieces(piece[:-1].replace(SPIECE_UNDERLINE, ""))
if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE:
if len(cur_pieces[0]) == 1:

View File

@ -293,7 +293,7 @@ class FocalNetDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
class FocalNetModulation(nn.Module):
@ -431,8 +431,8 @@ class FocalNetLayer(nn.Module):
self.gamma_1 = 1.0
self.gamma_2 = 1.0
if config.use_layerscale:
self.gamma_1 = nn.Parameter(config.layerscale_value * torch.ones((dim)), requires_grad=True)
self.gamma_2 = nn.Parameter(config.layerscale_value * torch.ones((dim)), requires_grad=True)
self.gamma_1 = nn.Parameter(config.layerscale_value * torch.ones(dim), requires_grad=True)
self.gamma_2 = nn.Parameter(config.layerscale_value * torch.ones(dim), requires_grad=True)
def forward(self, hidden_state, input_dimensions):
height, width = input_dimensions

View File

@ -65,7 +65,7 @@ class GLPNDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
# Copied from transformers.models.segformer.modeling_segformer.SegformerOverlapPatchEmbeddings

View File

@ -33,7 +33,7 @@ VOCAB_FILES_NAMES = {
}
@lru_cache()
@lru_cache
def bytes_to_unicode():
"""
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control

View File

@ -757,7 +757,7 @@ class GraniteMoeHybridMambaLayer(nn.Module):
# 2. Compute the state for each intra-chunk
# (right term of low-rank factorization of off-diagonal blocks; B terms)
decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum))
decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum)
B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None]
states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2)

View File

@ -921,7 +921,7 @@ class GroundingDinoDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
class GroundingDinoFusionLayer(nn.Module):
@ -937,8 +937,8 @@ class GroundingDinoFusionLayer(nn.Module):
# add layer scale for training stability
self.drop_path = GroundingDinoDropPath(drop_path) if drop_path > 0.0 else nn.Identity()
init_values = 1e-4
self.vision_param = nn.Parameter(init_values * torch.ones((config.d_model)), requires_grad=True)
self.text_param = nn.Parameter(init_values * torch.ones((config.d_model)), requires_grad=True)
self.vision_param = nn.Parameter(init_values * torch.ones(config.d_model), requires_grad=True)
self.text_param = nn.Parameter(init_values * torch.ones(config.d_model), requires_grad=True)
def forward(
self,

View File

@ -459,7 +459,7 @@ class HieraDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
class HieraMlp(nn.Module):

View File

@ -203,7 +203,7 @@ def convert_hubert_checkpoint(
config.vocab_size = len(target_dict.symbols)
vocab_path = os.path.join(pytorch_dump_folder_path, "vocab.json")
if not os.path.isdir(pytorch_dump_folder_path):
logger.error("--pytorch_dump_folder_path ({}) should be a directory".format(pytorch_dump_folder_path))
logger.error(f"--pytorch_dump_folder_path ({pytorch_dump_folder_path}) should be a directory")
return
os.makedirs(pytorch_dump_folder_path, exist_ok=True)
with open(vocab_path, "w", encoding="utf-8") as vocab_handle:

View File

@ -300,12 +300,7 @@ class IdeficsDecoupledEmbedding(nn.Embedding):
return full_vector
def extra_repr(self) -> str:
return "num_embeddings={}, num_additional_embeddings={}, embedding_dim={}, partially_freeze={}".format(
self.num_embeddings,
self.num_additional_embeddings,
self.embedding_dim,
self.partially_freeze,
)
return f"num_embeddings={self.num_embeddings}, num_additional_embeddings={self.num_additional_embeddings}, embedding_dim={self.embedding_dim}, partially_freeze={self.partially_freeze}"
class IdeficsDecoupledLinear(nn.Linear):
@ -364,13 +359,7 @@ class IdeficsDecoupledLinear(nn.Linear):
def extra_repr(self) -> str:
"""Overwriting `nn.Linear.extra_repr` to include new parameters."""
return "in_features={}, out_features={}, out_additional_features={}, bias={}, partially_freeze={}".format(
self.in_features,
self.out_features,
self.out_additional_features,
self.bias is not None,
self.partially_freeze,
)
return f"in_features={self.in_features}, out_features={self.out_features}, out_additional_features={self.out_additional_features}, bias={self.bias is not None}, partially_freeze={self.partially_freeze}"
# this was adapted from LlamaRMSNorm

View File

@ -362,12 +362,7 @@ class TFIdeficsDecoupledEmbedding(tf.keras.layers.Embedding):
return full_vector
def extra_repr(self) -> str:
return "num_embeddings={}, num_additional_embeddings={}, embedding_dim={}, partially_freeze={}".format(
self.num_embeddings,
self.num_additional_embeddings,
self.output_dim,
self.partially_freeze,
)
return f"num_embeddings={self.num_embeddings}, num_additional_embeddings={self.num_additional_embeddings}, embedding_dim={self.output_dim}, partially_freeze={self.partially_freeze}"
class TFIdeficsDecoupledLinear(tf.keras.layers.Layer):
@ -431,13 +426,7 @@ class TFIdeficsDecoupledLinear(tf.keras.layers.Layer):
def extra_repr(self) -> str:
"""Overwriting `nn.Linear.extra_repr` to include new parameters."""
return "in_features={}, out_features={}, out_additional_features={}, bias={}, partially_freeze={}".format(
self.in_features,
self.out_features,
self.out_additional_features,
self.bias is not None,
self.partially_freeze,
)
return f"in_features={self.in_features}, out_features={self.out_features}, out_additional_features={self.out_additional_features}, bias={self.bias is not None}, partially_freeze={self.partially_freeze}"
@classmethod
def from_config(cls, config):

View File

@ -60,14 +60,14 @@ def load_tf_weights_in_imagegpt(model, config, imagegpt_checkpoint_path):
)
raise
tf_path = os.path.abspath(imagegpt_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model
init_vars = tf.train.list_variables(tf_path)
names = []
arrays = []
for name, shape in init_vars:
logger.info("Loading TF weight {} with shape {}".format(name, shape))
logger.info(f"Loading TF weight {name} with shape {shape}")
array = tf.train.load_variable(tf_path, name)
names.append(name)
arrays.append(array.squeeze())
@ -129,7 +129,7 @@ def load_tf_weights_in_imagegpt(model, config, imagegpt_checkpoint_path):
e.args += (pointer.shape, array.shape)
raise
logger.info("Initialize PyTorch weight {}".format(name))
logger.info(f"Initialize PyTorch weight {name}")
if name[-1] == "q_proj":
pointer.data[:, : config.n_embd] = torch.from_numpy(array.reshape(config.n_embd, config.n_embd)).T

View File

@ -397,8 +397,8 @@ class InternVLVisionLayer(nn.Module):
self.layernorm_after = NORM2FN[config.norm_type](config.hidden_size, eps=config.layer_norm_eps)
init_values = config.layer_scale_init_value
self.lambda_1 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
self.lambda_2 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
self.lambda_1 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
self.lambda_2 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
self.dropout = nn.Dropout(config.hidden_dropout_prob)
def forward(

View File

@ -348,8 +348,8 @@ class InternVLVisionLayer(nn.Module):
self.layernorm_after = NORM2FN[config.norm_type](config.hidden_size, eps=config.layer_norm_eps)
init_values = config.layer_scale_init_value
self.lambda_1 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
self.lambda_2 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
self.lambda_1 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
self.lambda_2 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
self.dropout = nn.Dropout(config.hidden_dropout_prob)
def forward(

View File

@ -140,7 +140,7 @@ LAYOUTLMV3_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
"""
@lru_cache()
@lru_cache
# Copied from transformers.models.roberta.tokenization_roberta.bytes_to_unicode
def bytes_to_unicode():
"""

View File

@ -34,7 +34,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"}
# See all LED models at https://huggingface.co/models?filter=LED
@lru_cache()
@lru_cache
# Copied from transformers.models.bart.tokenization_bart.bytes_to_unicode
def bytes_to_unicode():
"""

View File

@ -797,22 +797,18 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel):
# Remove image pairs that have been early stopped from the forward pass
num_points_per_pair = num_points_per_pair[~early_stopped_pairs]
descriptors, keypoints_0, keypoint_1, mask, image_indices = tuple(
(
tensor[~early_stops]
for tensor in [descriptors, keypoints[0], keypoints[1], mask, image_indices]
)
tensor[~early_stops]
for tensor in [descriptors, keypoints[0], keypoints[1], mask, image_indices]
)
keypoints = (keypoints_0, keypoint_1)
if do_keypoint_pruning:
pruned_keypoints_indices, pruned_keypoints_iterations, keypoint_confidences = tuple(
(
tensor[~early_stops]
for tensor in [
pruned_keypoints_indices,
pruned_keypoints_iterations,
keypoint_confidences,
]
)
tensor[~early_stops]
for tensor in [
pruned_keypoints_indices,
pruned_keypoints_iterations,
keypoint_confidences,
]
)
# If all pairs of images are early stopped, we stop the forward pass through the transformer
# layers for all pairs of images.

View File

@ -871,22 +871,18 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel):
# Remove image pairs that have been early stopped from the forward pass
num_points_per_pair = num_points_per_pair[~early_stopped_pairs]
descriptors, keypoints_0, keypoint_1, mask, image_indices = tuple(
(
tensor[~early_stops]
for tensor in [descriptors, keypoints[0], keypoints[1], mask, image_indices]
)
tensor[~early_stops]
for tensor in [descriptors, keypoints[0], keypoints[1], mask, image_indices]
)
keypoints = (keypoints_0, keypoint_1)
if do_keypoint_pruning:
pruned_keypoints_indices, pruned_keypoints_iterations, keypoint_confidences = tuple(
(
tensor[~early_stops]
for tensor in [
pruned_keypoints_indices,
pruned_keypoints_iterations,
keypoint_confidences,
]
)
tensor[~early_stops]
for tensor in [
pruned_keypoints_indices,
pruned_keypoints_iterations,
keypoint_confidences,
]
)
# If all pairs of images are early stopped, we stop the forward pass through the transformer
# layers for all pairs of images.

View File

@ -161,13 +161,11 @@ def convert_llava_llama_to_hf(text_model_id, vision_model_id, output_hub_path, o
vocab_size = config.text_config.vocab_size
model.resize_token_embeddings(config.text_config.vocab_size + 2, pad_shape)
model.language_model.model.embed_tokens.weight.data[vocab_size:] = torch.stack(
tuple(
(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0]))
),
tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])),
dim=0,
)
model.language_model.lm_head.weight.data[vocab_size:] = torch.stack(
tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0]))),
tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0])),
dim=0,
)

View File

@ -175,15 +175,12 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
model.resize_token_embeddings(num_tokens, pad_to_multiple_of=pad_shape)
model.language_model.model.embed_tokens.weight.data[vocab_size:] = torch.stack(
tuple(
(
dist.sample()
for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])
)
dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])
),
dim=0,
)
model.language_model.lm_head.weight.data[vocab_size:] = torch.stack(
tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0]))),
tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0])),
dim=0,
)

View File

@ -227,13 +227,11 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
num_tokens = vocab_size + 3
model.resize_token_embeddings(num_tokens, pad_to_multiple_of=pad_shape)
model.language_model.model.embed_tokens.weight.data[vocab_size:] = torch.stack(
tuple(
(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0]))
),
tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])),
dim=0,
)
model.language_model.lm_head.weight.data[vocab_size:] = torch.stack(
tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0]))),
tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0])),
dim=0,
)

View File

@ -176,13 +176,11 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
num_tokens = vocab_size + 2
model.resize_token_embeddings(num_tokens, pad_to_multiple_of=pad_shape)
model.language_model.model.embed_tokens.weight.data[vocab_size:] = torch.stack(
tuple(
(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0]))
),
tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])),
dim=0,
)
model.language_model.lm_head.weight.data[vocab_size:] = torch.stack(
tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0]))),
tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0])),
dim=0,
)

View File

@ -30,7 +30,7 @@ logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"}
@lru_cache()
@lru_cache
# Copied from transformers.models.roberta.tokenization_roberta.bytes_to_unicode
def bytes_to_unicode():
"""

View File

@ -127,7 +127,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
raise ValueError
# Finally, save our PyTorch model and tokenizer
print("Saving PyTorch model to {}".format(pytorch_dump_folder_path))
print(f"Saving PyTorch model to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)

View File

@ -130,7 +130,7 @@ ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
"""
@lru_cache()
@lru_cache
# Copied from transformers.models.roberta.tokenization_roberta.bytes_to_unicode
def bytes_to_unicode():
"""

View File

@ -606,7 +606,7 @@ class Mamba2Mixer(nn.Module):
# 2. Compute the state for each intra-chunk
# (right term of low-rank factorization of off-diagonal blocks; B terms)
decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum))
decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum)
B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None]
states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2)

View File

@ -89,7 +89,7 @@ MARKUPLM_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
"""
@lru_cache()
@lru_cache
def bytes_to_unicode():
"""
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control

View File

@ -44,7 +44,7 @@ logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "tokenizer_file": "tokenizer.json"}
@lru_cache()
@lru_cache
def bytes_to_unicode():
"""
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control

View File

@ -1247,8 +1247,8 @@ class Mask2FormerPixelDecoder(nn.Module):
nn.GroupNorm(32, feature_dim),
nn.ReLU(),
)
self.add_module("adapter_{}".format(idx + 1), lateral_conv)
self.add_module("layer_{}".format(idx + 1), output_conv)
self.add_module(f"adapter_{idx + 1}", lateral_conv)
self.add_module(f"layer_{idx + 1}", output_conv)
lateral_convs.append(lateral_conv)
output_convs.append(output_conv)

View File

@ -333,7 +333,7 @@ class MaskFormerSwinDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
# Copied from transformers.models.swin.modeling_swin.SwinSelfAttention with Swin->MaskFormerSwin
@ -556,7 +556,7 @@ class MaskFormerSwinLayer(nn.Module):
mask_windows = window_partition(img_mask, self.window_size)
mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
attn_mask = attn_mask.masked_fill(attn_mask != 0, -100.0).masked_fill(attn_mask == 0, 0.0)
else:
attn_mask = None
return attn_mask

View File

@ -62,7 +62,7 @@ def load_tf_weights_in_megatron_bert(model, config, tf_checkpoint_path):
)
raise
tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
# Load weights from TF model
init_vars = tf.train.list_variables(tf_path)
names = []
@ -112,7 +112,7 @@ def load_tf_weights_in_megatron_bert(model, config, tf_checkpoint_path):
array = np.transpose(array)
if pointer.shape != array.shape:
raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched")
logger.info("Initialize PyTorch weight {}".format(name))
logger.info(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array)
return model

View File

@ -65,7 +65,7 @@ class MgpstrDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
@dataclass

View File

@ -89,7 +89,7 @@ class MgpstrTokenizer(PreTrainedTokenizer):
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple[str]:
if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
return
vocab_file = os.path.join(
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]

View File

@ -487,7 +487,7 @@ def to_channel_dimension_format(
elif target_channel_dim == ChannelDimension.LAST:
image = image.transpose((1, 2, 0))
else:
raise ValueError("Unsupported channel dimension format: {}".format(channel_dim))
raise ValueError(f"Unsupported channel dimension format: {channel_dim}")
return image

View File

@ -179,7 +179,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
assert [e for e in multilingual_predicted_entities if e.startswith("en:")][0] == "en:Japan"
# Finally, save our PyTorch model and tokenizer
print("Saving PyTorch model to {}".format(pytorch_dump_folder_path))
print(f"Saving PyTorch model to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)

View File

@ -60,7 +60,7 @@ def load_orig_config_file(orig_cfg_file):
for k, v in flat_cfg.items():
setattr(config, k, v)
except yaml.YAMLError as exc:
logger.error("Error while loading config file: {}. Error message: {}".format(orig_cfg_file, str(exc)))
logger.error(f"Error while loading config file: {orig_cfg_file}. Error message: {str(exc)}")
return config

View File

@ -32,7 +32,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"}
# See all MVP models at https://huggingface.co/models?filter=mvp
@lru_cache()
@lru_cache
def bytes_to_unicode():
"""
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control

View File

@ -1142,7 +1142,7 @@ def get_class_similarity(class_distance_type, cls_feature, class_proj):
elif class_distance_type == "dot":
class_logits = torch.bmm(cls_feature, class_proj)
else:
raise Exception("Unknown class_distance_type {}".format(class_distance_type))
raise Exception(f"Unknown class_distance_type {class_distance_type}")
return class_logits

View File

@ -1345,8 +1345,8 @@ class OneFormerPixelDecoder(nn.Module):
nn.GroupNorm(32, config.conv_dim),
nn.ReLU(),
)
self.add_module("adapter_{}".format(idx + 1), lateral_conv)
self.add_module("layer_{}".format(idx + 1), output_conv)
self.add_module(f"adapter_{idx + 1}", lateral_conv)
self.add_module(f"layer_{idx + 1}", output_conv)
lateral_convs.append(lateral_conv)
output_convs.append(output_conv)

View File

@ -346,13 +346,11 @@ def convert_paligemma2_checkpoint(
# We add an image token so we resize the model
model.resize_token_embeddings(config.text_config.vocab_size + 2, pad_shape)
model.language_model.model.embed_tokens.weight.data[257152:] = torch.stack(
tuple(
(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[257152:].shape[0]))
),
tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[257152:].shape[0])),
dim=0,
)
model.language_model.lm_head.weight.data[257152:] = torch.stack(
tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[257152:].shape[0]))),
tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[257152:].shape[0])),
dim=0,
)
# convert to needed precision

View File

@ -279,11 +279,11 @@ def convert_paligemma_checkpoint(
# We add an image token so we resize the model
model.resize_token_embeddings(config.text_config.vocab_size + 2, pad_shape)
model.language_model.model.embed_tokens.weight.data[257152:] = torch.stack(
tuple((dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[257152:].shape[0]))),
tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[257152:].shape[0])),
dim=0,
)
model.language_model.lm_head.weight.data[257152:] = torch.stack(
tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[257152:].shape[0]))),
tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[257152:].shape[0])),
dim=0,
)

View File

@ -139,7 +139,7 @@ def convert_pix2struct_original_pytorch_checkpoint_to_hf(
model.save_pretrained(pytorch_dump_folder_path)
processor.save_pretrained(pytorch_dump_folder_path)
print("Model saved in {}".format(pytorch_dump_folder_path))
print(f"Model saved in {pytorch_dump_folder_path}")
if __name__ == "__main__":

View File

@ -185,7 +185,7 @@ class PoolFormerImageProcessor(BaseImageProcessor):
else:
scale_size = (int(size["height"] / crop_pct), int(size["width"] / crop_pct))
else:
raise ValueError("Invalid size for resize: {}".format(size))
raise ValueError(f"Invalid size for resize: {size}")
output_size = get_resize_output_image_size(
image, size=scale_size, default_to_square=False, input_data_format=input_data_format
@ -198,7 +198,7 @@ class PoolFormerImageProcessor(BaseImageProcessor):
elif "height" in size and "width" in size:
output_size = (size["height"], size["width"])
else:
raise ValueError("Invalid size for resize: {}".format(size))
raise ValueError(f"Invalid size for resize: {size}")
return resize(
image,

View File

@ -136,7 +136,7 @@ class PoolFormerImageProcessorFast(BaseImageProcessorFast):
else:
scale_size = (int(size.height / crop_pct), int(size.width / crop_pct))
else:
raise ValueError("Invalid size for resize: {}".format(size))
raise ValueError(f"Invalid size for resize: {size}")
new_size = get_resize_output_image_size(
image,

View File

@ -65,7 +65,7 @@ class PoolFormerDropPath(nn.Module):
return drop_path(hidden_states, self.drop_prob, self.training)
def extra_repr(self) -> str:
return "p={}".format(self.drop_prob)
return f"p={self.drop_prob}"
class PoolFormerEmbeddings(nn.Module):
@ -142,10 +142,10 @@ class PoolFormerLayer(nn.Module):
self.use_layer_scale = config.use_layer_scale
if config.use_layer_scale:
self.layer_scale_1 = nn.Parameter(
config.layer_scale_init_value * torch.ones((num_channels)), requires_grad=True
config.layer_scale_init_value * torch.ones(num_channels), requires_grad=True
)
self.layer_scale_2 = nn.Parameter(
config.layer_scale_init_value * torch.ones((num_channels)), requires_grad=True
config.layer_scale_init_value * torch.ones(num_channels), requires_grad=True
)
def forward(self, hidden_states):

View File

@ -404,7 +404,7 @@ class Pop2PianoTokenizer(PreTrainedTokenizer):
notes = np.round(notes).astype(np.int32)
max_time_idx = notes[:, :2].max()
times = [[] for i in range((max_time_idx + 1))]
times = [[] for i in range(max_time_idx + 1)]
for onset, offset, pitch, velocity in notes:
times[onset].append([pitch, velocity])
times[offset].append([pitch, 0])

Some files were not shown because too many files have changed in this diff Show More