Update ruff to 0.11.2 (#36962)

* update

* update

* update

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar 2025-03-25 16:00:11 +01:00 committed by GitHub
parent bc1c90a755
commit c6814b4ee8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
152 changed files with 604 additions and 609 deletions

View File

@ -162,7 +162,7 @@ _deps = [
"rhoknp>=1.1.0,<1.3.1", "rhoknp>=1.1.0,<1.3.1",
"rjieba", "rjieba",
"rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1", "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1",
"ruff==0.5.1", "ruff==0.11.2",
"sacrebleu>=1.4.12,<2.0.0", "sacrebleu>=1.4.12,<2.0.0",
"sacremoses", "sacremoses",
"safetensors>=0.4.3", "safetensors>=0.4.3",

View File

@ -167,9 +167,9 @@ class Tool:
) )
for input_name, input_content in self.inputs.items(): for input_name, input_content in self.inputs.items():
assert isinstance(input_content, dict), f"Input '{input_name}' should be a dictionary." assert isinstance(input_content, dict), f"Input '{input_name}' should be a dictionary."
assert ( assert "type" in input_content and "description" in input_content, (
"type" in input_content and "description" in input_content f"Input '{input_name}' should have keys 'type' and 'description', has only {list(input_content.keys())}."
), f"Input '{input_name}' should have keys 'type' and 'description', has only {list(input_content.keys())}." )
if input_content["type"] not in authorized_types: if input_content["type"] not in authorized_types:
raise Exception( raise Exception(
f"Input '{input_name}': type '{input_content['type']}' is not an authorized value, should be one of {authorized_types}." f"Input '{input_name}': type '{input_content['type']}' is not an authorized value, should be one of {authorized_types}."

View File

@ -313,11 +313,9 @@ def add_fast_image_processor_to_doc(fast_image_processor_name: str, model_name:
raise ValueError(f"No doc files found for {model_name}") raise ValueError(f"No doc files found for {model_name}")
base_doc_string = ( base_doc_string = (
f"## {fast_image_processor_name[:-4]}\n\n" f"[[autodoc]] {fast_image_processor_name[:-4]}\n" " - preprocess" f"## {fast_image_processor_name[:-4]}\n\n[[autodoc]] {fast_image_processor_name[:-4]}\n - preprocess"
)
fast_doc_string = (
f"## {fast_image_processor_name}\n\n" f"[[autodoc]] {fast_image_processor_name}\n" " - preprocess"
) )
fast_doc_string = f"## {fast_image_processor_name}\n\n[[autodoc]] {fast_image_processor_name}\n - preprocess"
for doc_file in doc_files: for doc_file in doc_files:
with open(doc_file, "r", encoding="utf-8") as f: with open(doc_file, "r", encoding="utf-8") as f:
@ -385,7 +383,7 @@ def add_fast_image_processor_to_tests(fast_image_processor_name: str, model_name
# add the fast image processor to the imports # add the fast image processor to the imports
base_import_string = f" from transformers import {fast_image_processor_name[:-4]}" base_import_string = f" from transformers import {fast_image_processor_name[:-4]}"
fast_import_string = ( fast_import_string = (
" if is_torchvision_available():\n" f" from transformers import {fast_image_processor_name}" f" if is_torchvision_available():\n from transformers import {fast_image_processor_name}"
) )
if fast_import_string not in updated_content: if fast_import_string not in updated_content:
updated_content = updated_content.replace(base_import_string, base_import_string + "\n\n" + fast_import_string) updated_content = updated_content.replace(base_import_string, base_import_string + "\n\n" + fast_import_string)
@ -546,17 +544,17 @@ def add_fast_image_processor_file(
" # For an example of a fast image processor requiring more complex augmentations, see `LlavaNextImageProcessorFast`.\n\n" " # For an example of a fast image processor requiring more complex augmentations, see `LlavaNextImageProcessorFast`.\n\n"
" # Default values should be checked against the slow image processor\n" " # Default values should be checked against the slow image processor\n"
" # None values left after checking can be removed\n" " # None values left after checking can be removed\n"
f' resample = {default_args_dict.get("resample")}\n' f" resample = {default_args_dict.get('resample')}\n"
f' image_mean = {default_args_dict.get("image_mean")}\n' f" image_mean = {default_args_dict.get('image_mean')}\n"
f' image_std = {default_args_dict.get("image_std")}\n' f" image_std = {default_args_dict.get('image_std')}\n"
f' size = {default_args_dict.get("size")}\n' f" size = {default_args_dict.get('size')}\n"
f' default_to_square = {default_args_dict.get("default_to_square")}\n' f" default_to_square = {default_args_dict.get('default_to_square')}\n"
f' crop_size = {default_args_dict.get("crop_size")}\n' f" crop_size = {default_args_dict.get('crop_size')}\n"
f' do_resize = {default_args_dict.get("do_resize")}\n' f" do_resize = {default_args_dict.get('do_resize')}\n"
f' do_center_crop = {default_args_dict.get("do_center_crop")}\n' f" do_center_crop = {default_args_dict.get('do_center_crop')}\n"
f' do_rescale = {default_args_dict.get("do_rescale")}\n' f" do_rescale = {default_args_dict.get('do_rescale')}\n"
f' do_normalize = {default_args_dict.get("do_normalize")}\n' f" do_normalize = {default_args_dict.get('do_normalize')}\n"
f' do_convert_rgb = {default_args_dict.get("do_convert_rgb")}\n\n\n' f" do_convert_rgb = {default_args_dict.get('do_convert_rgb')}\n\n\n"
f'__all__ = ["{fast_image_processor_name}"]\n' f'__all__ = ["{fast_image_processor_name}"]\n'
) )

View File

@ -189,7 +189,7 @@ def infer_shapes(nlp: Pipeline, framework: str) -> tuple[list[str], list[str], d
raise ValueError(f"Unable to infer tensor axes ({len(tensor.shape)})") raise ValueError(f"Unable to infer tensor axes ({len(tensor.shape)})")
else: else:
seq_axes = [dim for dim, shape in enumerate(tensor.shape) if shape == seq_len] seq_axes = [dim for dim, shape in enumerate(tensor.shape) if shape == seq_len]
axes.update({dim: "sequence" for dim in seq_axes}) axes.update(dict.fromkeys(seq_axes, "sequence"))
print(f"Found {'input' if is_input else 'output'} {name} with shape: {axes}") print(f"Found {'input' if is_input else 'output'} {name} with shape: {axes}")
return axes return axes

View File

@ -226,7 +226,7 @@ def squad_evaluate(examples, preds, no_answer_probs=None, no_answer_probability_
no_answer_qids = [qas_id for qas_id, has_answer in qas_id_to_has_answer.items() if not has_answer] no_answer_qids = [qas_id for qas_id, has_answer in qas_id_to_has_answer.items() if not has_answer]
if no_answer_probs is None: if no_answer_probs is None:
no_answer_probs = {k: 0.0 for k in preds} no_answer_probs = dict.fromkeys(preds, 0.0)
exact, f1 = get_raw_scores(examples, preds) exact, f1 = get_raw_scores(examples, preds)

View File

@ -101,7 +101,7 @@ if is_tf_available():
return tf.data.Dataset.from_generator( return tf.data.Dataset.from_generator(
gen, gen,
({k: tf.int32 for k in input_names}, label_type), (dict.fromkeys(input_names, tf.int32), label_type),
({k: tf.TensorShape([None]) for k in input_names}, tf.TensorShape([])), ({k: tf.TensorShape([None]) for k in input_names}, tf.TensorShape([])),
) )

View File

@ -68,7 +68,7 @@ deps = {
"rhoknp": "rhoknp>=1.1.0,<1.3.1", "rhoknp": "rhoknp>=1.1.0,<1.3.1",
"rjieba": "rjieba", "rjieba": "rjieba",
"rouge-score": "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1", "rouge-score": "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1",
"ruff": "ruff==0.5.1", "ruff": "ruff==0.11.2",
"sacrebleu": "sacrebleu>=1.4.12,<2.0.0", "sacrebleu": "sacrebleu>=1.4.12,<2.0.0",
"sacremoses": "sacremoses", "sacremoses": "sacremoses",
"safetensors": "safetensors>=0.4.3", "safetensors": "safetensors>=0.4.3",

View File

@ -2749,9 +2749,7 @@ class SynthIDTextWatermarkLogitsProcessor(LogitsProcessor):
ngram keys (batch_size, num_ngrams, depth). ngram keys (batch_size, num_ngrams, depth).
""" """
if len(ngrams.shape) != 3: if len(ngrams.shape) != 3:
raise ValueError( raise ValueError(f"Ngrams should be of shape (batch_size, num_ngrams, ngram_len), but is {ngrams.shape}")
"Ngrams should be of shape (batch_size, num_ngrams, ngram_len), but" f" is {ngrams.shape}"
)
if ngrams.shape[2] != self.ngram_len: if ngrams.shape[2] != self.ngram_len:
raise ValueError( raise ValueError(
"Ngrams should be of shape (batch_size, num_ngrams, ngram_len)," "Ngrams should be of shape (batch_size, num_ngrams, ngram_len),"
@ -2836,7 +2834,7 @@ class SynthIDTextWatermarkLogitsProcessor(LogitsProcessor):
def _check_input_ids_shape(self, input_ids: torch.LongTensor): def _check_input_ids_shape(self, input_ids: torch.LongTensor):
"""Checks the shape of input ids.""" """Checks the shape of input ids."""
if len(input_ids.shape) != 2: if len(input_ids.shape) != 2:
raise ValueError("Input ids should be of shape (batch_size, input_len), but is" f" {input_ids.shape}") raise ValueError(f"Input ids should be of shape (batch_size, input_len), but is {input_ids.shape}")
def compute_g_values(self, input_ids: torch.LongTensor) -> torch.LongTensor: def compute_g_values(self, input_ids: torch.LongTensor) -> torch.LongTensor:
""" """

View File

@ -1678,7 +1678,7 @@ class GenerationMixin:
if execution_device_map is None: if execution_device_map is None:
return None return None
elif len(execution_device_map) == 1 and "" in execution_device_map: elif len(execution_device_map) == 1 and "" in execution_device_map:
return {idx: execution_device_map[""] for idx in range(num_hidden_layers)} return dict.fromkeys(range(num_hidden_layers), execution_device_map[""])
layer_device_map = {} layer_device_map = {}
for layer in execution_device_map: for layer in execution_device_map:
for idx in range(num_hidden_layers): for idx in range(num_hidden_layers):

View File

@ -106,11 +106,11 @@ def prepare_for_hqq_linear(model, quantization_config=None, modules_to_not_conve
if any(key in linear_tags for key in quant_config.keys()): if any(key in linear_tags for key in quant_config.keys()):
# If the user doesn't specify a key from get_linear_tags, the layer is not quantized via (key, None) # If the user doesn't specify a key from get_linear_tags, the layer is not quantized via (key, None)
patch_params = {key: None for key in linear_tags} patch_params = dict.fromkeys(linear_tags)
patch_params.update(quant_config) patch_params.update(quant_config)
else: else:
# Same quant_config for all layers # Same quant_config for all layers
patch_params = {k: quant_config for k in linear_tags} patch_params = dict.fromkeys(linear_tags, quant_config)
model, has_been_replaced = _prepare_for_hqq_linear( model, has_been_replaced = _prepare_for_hqq_linear(
model, patch_params=patch_params, has_been_replaced=has_been_replaced model, patch_params=patch_params, has_been_replaced=has_been_replaced

View File

@ -21,9 +21,9 @@ def tpu_spmd_dataloader(dataloader: DataLoader):
if is_torch_xla_available(): if is_torch_xla_available():
import torch_xla.distributed.parallel_loader as pl import torch_xla.distributed.parallel_loader as pl
assert isinstance( assert isinstance(dataloader, pl.MpDeviceLoader), (
dataloader, pl.MpDeviceLoader "The dataloader must be a `torch_xla.distributed.parallel_loader.MpDeviceLoader`."
), "The dataloader must be a `torch_xla.distributed.parallel_loader.MpDeviceLoader`." )
# This is to support PyTorch/XLA FSDP via SPMD. # This is to support PyTorch/XLA FSDP via SPMD.
# Here we shard the input data's 0th dim across the fsdp axis. # Here we shard the input data's 0th dim across the fsdp axis.

View File

@ -2509,9 +2509,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
total_decoder_name="", total_decoder_name="",
total_encoder_name="", total_encoder_name="",
): ):
assert isinstance(decoder_pointer, nn.Module) and isinstance( assert isinstance(decoder_pointer, nn.Module) and isinstance(encoder_pointer, nn.Module), (
encoder_pointer, nn.Module f"{decoder_pointer} and {encoder_pointer} have to be of type nn.Module"
), f"{decoder_pointer} and {encoder_pointer} have to be of type nn.Module" )
if hasattr(decoder_pointer, "weight"): if hasattr(decoder_pointer, "weight"):
assert hasattr(encoder_pointer, "weight") assert hasattr(encoder_pointer, "weight")
encoder_pointer.weight = decoder_pointer.weight encoder_pointer.weight = decoder_pointer.weight
@ -2525,9 +2525,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
encoder_modules = encoder_pointer._modules encoder_modules = encoder_pointer._modules
decoder_modules = decoder_pointer._modules decoder_modules = decoder_pointer._modules
if len(decoder_modules) > 0: if len(decoder_modules) > 0:
assert ( assert len(encoder_modules) > 0, (
len(encoder_modules) > 0 f"Encoder module {encoder_pointer} does not match decoder module {decoder_pointer}"
), f"Encoder module {encoder_pointer} does not match decoder module {decoder_pointer}" )
all_encoder_weights = {module_name + "/" + sub_name for sub_name in encoder_modules.keys()} all_encoder_weights = {module_name + "/" + sub_name for sub_name in encoder_modules.keys()}
encoder_layer_pos = 0 encoder_layer_pos = 0
@ -3571,7 +3571,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
f"Please upgrade accelerate with `pip install -U accelerate`" f"Please upgrade accelerate with `pip install -U accelerate`"
) )
# init state_dict for this shard # init state_dict for this shard
shard_state_dict = {name: "" for name in shard} shard_state_dict = dict.fromkeys(shard, "")
for module_name in shard: for module_name in shard:
# skip to collect this weight again # skip to collect this weight again
if shard_state_dict.get(module_name) != "": if shard_state_dict.get(module_name) != "":
@ -4814,7 +4814,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
param_device_map = expand_device_map(device_map, checkpoint_keys) param_device_map = expand_device_map(device_map, checkpoint_keys)
str_dtype = str(dtype).replace("torch.", "") if dtype is not None else "float32" str_dtype = str(dtype).replace("torch.", "") if dtype is not None else "float32"
if sharded_metadata is None: if sharded_metadata is None:
weight_map = {p: checkpoint_files[0] for p in checkpoint_keys} weight_map = dict.fromkeys(checkpoint_keys, checkpoint_files[0])
else: else:
folder = os.path.sep.join(checkpoint_files[0].split(os.path.sep)[:-1]) folder = os.path.sep.join(checkpoint_files[0].split(os.path.sep)[:-1])
# Fix the weight map keys according to the key mapping # Fix the weight map keys according to the key mapping
@ -5446,9 +5446,9 @@ class PoolerEndLogits(nn.Module):
Returns: Returns:
`torch.FloatTensor`: The end logits for SQuAD. `torch.FloatTensor`: The end logits for SQuAD.
""" """
assert ( assert start_states is not None or start_positions is not None, (
start_states is not None or start_positions is not None "One of start_states, start_positions should be not None"
), "One of start_states, start_positions should be not None" )
if start_positions is not None: if start_positions is not None:
slen, hsz = hidden_states.shape[-2:] slen, hsz = hidden_states.shape[-2:]
start_positions = start_positions[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz) start_positions = start_positions[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz)
@ -5514,9 +5514,9 @@ class PoolerAnswerClass(nn.Module):
""" """
# No dependency on end_feature so that we can obtain one single `cls_logits` for each sample. # No dependency on end_feature so that we can obtain one single `cls_logits` for each sample.
hsz = hidden_states.shape[-1] hsz = hidden_states.shape[-1]
assert ( assert start_states is not None or start_positions is not None, (
start_states is not None or start_positions is not None "One of start_states, start_positions should be not None"
), "One of start_states, start_positions should be not None" )
if start_positions is not None: if start_positions is not None:
start_positions = start_positions[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz) start_positions = start_positions[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz)
start_states = hidden_states.gather(-2, start_positions).squeeze(-2) # shape (bsz, hsz) start_states = hidden_states.gather(-2, start_positions).squeeze(-2) # shape (bsz, hsz)

View File

@ -1058,7 +1058,7 @@ class AltCLIPVisionEmbeddings(nn.Module):
batch_size, _, height, width = pixel_values.shape batch_size, _, height, width = pixel_values.shape
if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size):
raise ValueError( raise ValueError(
f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})."
) )
target_dtype = self.patch_embedding.weight.dtype target_dtype = self.patch_embedding.weight.dtype
patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid]

View File

@ -329,7 +329,7 @@ class BridgeTowerVisionEmbeddings(nn.Module):
batch_size, _, height, width = pixel_values.shape batch_size, _, height, width = pixel_values.shape
if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size):
raise ValueError( raise ValueError(
f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})."
) )
target_dtype = self.patch_embedding.weight.dtype target_dtype = self.patch_embedding.weight.dtype
patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid]

View File

@ -234,7 +234,7 @@ class ChineseCLIPVisionEmbeddings(nn.Module):
batch_size, _, height, width = pixel_values.shape batch_size, _, height, width = pixel_values.shape
if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size):
raise ValueError( raise ValueError(
f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})."
) )
target_dtype = self.patch_embedding.weight.dtype target_dtype = self.patch_embedding.weight.dtype
patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid]

View File

@ -242,7 +242,7 @@ class CLIPVisionEmbeddings(nn.Module):
batch_size, _, height, width = pixel_values.shape batch_size, _, height, width = pixel_values.shape
if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size):
raise ValueError( raise ValueError(
f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})."
) )
target_dtype = self.patch_embedding.weight.dtype target_dtype = self.patch_embedding.weight.dtype
patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid]

View File

@ -209,7 +209,7 @@ class CLIPSegVisionEmbeddings(nn.Module):
batch_size, _, height, width = pixel_values.shape batch_size, _, height, width = pixel_values.shape
if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size):
raise ValueError( raise ValueError(
f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})."
) )
patch_embeds = self.patch_embedding(pixel_values) # shape = [*, width, grid, grid] patch_embeds = self.patch_embedding(pixel_values) # shape = [*, width, grid, grid]
patch_embeds = patch_embeds.flatten(2).transpose(1, 2) patch_embeds = patch_embeds.flatten(2).transpose(1, 2)

View File

@ -144,7 +144,7 @@ class ClvpEncoderConfig(PretrainedConfig):
# this is to make sure that we can load only text or speech configs from the nested ClvpConfig. # this is to make sure that we can load only text or speech configs from the nested ClvpConfig.
if config_type not in cls.base_config_key: if config_type not in cls.base_config_key:
raise ValueError( raise ValueError(
f"We can only load either 'text_config' or 'speech_config' but you are trying to load" f"{config_type}" f"We can only load either 'text_config' or 'speech_config' but you are trying to load{config_type}"
) )
# get the text config dict if we are loading from ClvpConfig # get the text config dict if we are loading from ClvpConfig

View File

@ -127,9 +127,9 @@ def convert_data2vec_checkpoint_to_pytorch(
# self-attention output # self-attention output
self_output: BertSelfOutput = layer.attention.output self_output: BertSelfOutput = layer.attention.output
assert ( assert self_output.dense.weight.shape == data2vec_layer.self_attn.out_proj.weight.shape, (
self_output.dense.weight.shape == data2vec_layer.self_attn.out_proj.weight.shape f"Shape for self_output.dense.weight should be {data2vec_layer.self_attn.out_proj.weight.shape}"
), f"Shape for self_output.dense.weight should be {data2vec_layer.self_attn.out_proj.weight.shape}" )
self_output.dense.weight = data2vec_layer.self_attn.out_proj.weight self_output.dense.weight = data2vec_layer.self_attn.out_proj.weight
self_output.dense.bias = data2vec_layer.self_attn.out_proj.bias self_output.dense.bias = data2vec_layer.self_attn.out_proj.bias
self_output.LayerNorm.weight = data2vec_layer.self_attn_layer_norm.weight self_output.LayerNorm.weight = data2vec_layer.self_attn_layer_norm.weight
@ -137,17 +137,17 @@ def convert_data2vec_checkpoint_to_pytorch(
# intermediate # intermediate
intermediate: BertIntermediate = layer.intermediate intermediate: BertIntermediate = layer.intermediate
assert ( assert intermediate.dense.weight.shape == data2vec_layer.fc1.weight.shape, (
intermediate.dense.weight.shape == data2vec_layer.fc1.weight.shape f"Shape for intermediate.dense.weight should be {data2vec_layer.fc1.weight.shape}"
), f"Shape for intermediate.dense.weight should be {data2vec_layer.fc1.weight.shape}" )
intermediate.dense.weight = data2vec_layer.fc1.weight intermediate.dense.weight = data2vec_layer.fc1.weight
intermediate.dense.bias = data2vec_layer.fc1.bias intermediate.dense.bias = data2vec_layer.fc1.bias
# output # output
bert_output: BertOutput = layer.output bert_output: BertOutput = layer.output
assert ( assert bert_output.dense.weight.shape == data2vec_layer.fc2.weight.shape, (
bert_output.dense.weight.shape == data2vec_layer.fc2.weight.shape f"Shape for bert_output.dense.weight should be {data2vec_layer.fc2.weight.shape}"
), f"Shape for bert_output.dense.weight should be {data2vec_layer.fc2.weight.shape}" )
bert_output.dense.weight = data2vec_layer.fc2.weight bert_output.dense.weight = data2vec_layer.fc2.weight
bert_output.dense.bias = data2vec_layer.fc2.bias bert_output.dense.bias = data2vec_layer.fc2.bias
bert_output.LayerNorm.weight = data2vec_layer.final_layer_norm.weight bert_output.LayerNorm.weight = data2vec_layer.final_layer_norm.weight

View File

@ -180,9 +180,9 @@ def convert_bort_checkpoint_to_pytorch(bort_checkpoint_path: str, pytorch_dump_f
gluon_param = to_torch(params[gluon_param]) gluon_param = to_torch(params[gluon_param])
shape_gluon = gluon_param.shape shape_gluon = gluon_param.shape
assert ( assert shape_hf == shape_gluon, (
shape_hf == shape_gluon f"The gluon parameter {gluon_param} has shape {shape_gluon}, but expects shape {shape_hf} for Transformers"
), f"The gluon parameter {gluon_param} has shape {shape_gluon}, but expects shape {shape_hf} for Transformers" )
return gluon_param return gluon_param

View File

@ -427,7 +427,7 @@ class SubWordJapaneseTokenizer:
) )
keisen = "─━│┃┄┅┆┇┈┉┊┋┌┍┎┏┐┑┒┓└┕┖┗┘┙┚┛├┝┞┟┠┡┢┣┤┥┦┧┨┩┪┫┬┭┮┯┰┱┲┳┴┵┶┷┸┹┺┻┼┽┾┿╀╁╂╃╄╅╆╇╈╉╊╋╌╍╎╏═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬╭╮╯╰╱╲╳╴╵╶╷╸╹╺╻╼╽╾╿" keisen = "─━│┃┄┅┆┇┈┉┊┋┌┍┎┏┐┑┒┓└┕┖┗┘┙┚┛├┝┞┟┠┡┢┣┤┥┦┧┨┩┪┫┬┭┮┯┰┱┲┳┴┵┶┷┸┹┺┻┼┽┾┿╀╁╂╃╄╅╆╇╈╉╊╋╌╍╎╏═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬╭╮╯╰╱╲╳╴╵╶╷╸╹╺╻╼╽╾╿"
blocks = "▀▁▂▃▄▅▆▇█▉▊▋▌▍▎▏▐░▒▓▔▕▖▗▘▙▚▛▜▝▞▟" blocks = "▀▁▂▃▄▅▆▇█▉▊▋▌▍▎▏▐░▒▓▔▕▖▗▘▙▚▛▜▝▞▟"
self.content_trans1 = str.maketrans({k: "<BLOCK>" for k in keisen + blocks}) self.content_trans1 = str.maketrans(dict.fromkeys(keisen + blocks, "<BLOCK>"))
def __len__(self): def __len__(self):
return len(self.ids_to_tokens) return len(self.ids_to_tokens)

View File

@ -154,7 +154,7 @@ class OpenLlamaConfig(PretrainedConfig):
if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2: if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
raise ValueError( raise ValueError(
"`rope_scaling` must be a dictionary with two fields, `type` and `factor`, " f"got {self.rope_scaling}" f"`rope_scaling` must be a dictionary with two fields, `type` and `factor`, got {self.rope_scaling}"
) )
rope_scaling_type = self.rope_scaling.get("type", None) rope_scaling_type = self.rope_scaling.get("type", None)
rope_scaling_factor = self.rope_scaling.get("factor", None) rope_scaling_factor = self.rope_scaling.get("factor", None)

View File

@ -139,9 +139,9 @@ def load_tf_weights_in_realm(model, config, tf_checkpoint_path):
elif m_name == "kernel": elif m_name == "kernel":
array = np.transpose(array) array = np.transpose(array)
try: try:
assert ( assert pointer.shape == array.shape, (
pointer.shape == array.shape f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" )
except AssertionError as e: except AssertionError as e:
e.args += (pointer.shape, array.shape) e.args += (pointer.shape, array.shape)
raise raise

View File

@ -579,7 +579,7 @@ class Speech2Text2Decoder(Speech2Text2PreTrainedModel):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning_once( logger.warning_once(
"`use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache =" " False`..." "`use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache = False`..."
) )
use_cache = False use_cache = False

View File

@ -1095,9 +1095,9 @@ class TFTransfoXLForSequenceClassification(TFTransfoXLPreTrainedModel, TFSequenc
batch_size, sequence_length = shape_list(input_ids)[:2] batch_size, sequence_length = shape_list(input_ids)[:2]
else: else:
batch_size, sequence_length = shape_list(inputs_embeds)[:2] batch_size, sequence_length = shape_list(inputs_embeds)[:2]
assert ( assert self.config.pad_token_id is not None or batch_size == 1, (
self.config.pad_token_id is not None or batch_size == 1 "Cannot handle batch sizes > 1 if no padding token is defined."
), "Cannot handle batch sizes > 1 if no padding token is defined." )
if not tf.is_tensor(sequence_lengths): if not tf.is_tensor(sequence_lengths):
in_logits = logits[0:batch_size, sequence_lengths] in_logits = logits[0:batch_size, sequence_lengths]

View File

@ -155,9 +155,9 @@ def load_tf_weights_in_transfo_xl(model, config, tf_path):
p_i.data = torch.from_numpy(arr_i) p_i.data = torch.from_numpy(arr_i)
else: else:
try: try:
assert ( assert pointer.shape == array.shape, (
pointer.shape == array.shape f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" )
except AssertionError as e: except AssertionError as e:
e.args += (pointer.shape, array.shape) e.args += (pointer.shape, array.shape)
raise raise
@ -1238,9 +1238,9 @@ class TransfoXLForSequenceClassification(TransfoXLPreTrainedModel):
else: else:
batch_size, sequence_length = inputs_embeds.shape[:2] batch_size, sequence_length = inputs_embeds.shape[:2]
assert ( assert self.config.pad_token_id is not None or batch_size == 1, (
self.config.pad_token_id is not None or batch_size == 1 "Cannot handle batch sizes > 1 if no padding token is defined."
), "Cannot handle batch sizes > 1 if no padding token is defined." )
if self.config.pad_token_id is None: if self.config.pad_token_id is None:
sequence_lengths = -1 sequence_lengths = -1
else: else:

View File

@ -588,9 +588,9 @@ class XLMProphetNetPositionalEmbeddings(nn.Embedding):
super().__init__(config.max_position_embeddings, config.hidden_size, config.pad_token_id) super().__init__(config.max_position_embeddings, config.hidden_size, config.pad_token_id)
def forward(self, inputs_shape, device, attention_mask=None, past_key_values=None, position_ids=None): def forward(self, inputs_shape, device, attention_mask=None, past_key_values=None, position_ids=None):
assert (position_ids is None) or ( assert (position_ids is None) or (self.padding_idx is None), (
self.padding_idx is None "If position_ids is pre-computed then padding_idx should not be set."
), "If position_ids is pre-computed then padding_idx should not be set." )
if position_ids is None: if position_ids is None:
if past_key_values is not None: if past_key_values is not None:
@ -784,9 +784,9 @@ class XLMProphetNetNgramSelfAttention(nn.Module):
self.head_dim = config.hidden_size // self.num_attn_heads self.head_dim = config.hidden_size // self.num_attn_heads
self.ngram = config.ngram self.ngram = config.ngram
assert ( assert self.head_dim * self.num_attn_heads == config.hidden_size, (
self.head_dim * self.num_attn_heads == config.hidden_size "config.hidden_size must be divisible by num_attn_heads"
), "config.hidden_size must be divisible by num_attn_heads" )
# key, value, query projection # key, value, query projection
self.key_proj = nn.Linear(config.hidden_size, config.hidden_size) self.key_proj = nn.Linear(config.hidden_size, config.hidden_size)
self.value_proj = nn.Linear(config.hidden_size, config.hidden_size) self.value_proj = nn.Linear(config.hidden_size, config.hidden_size)
@ -1041,9 +1041,9 @@ class XLMProphetNetNgramSelfAttention(nn.Module):
if predict_relative_position_buckets is None: if predict_relative_position_buckets is None:
key_sequence_length = attn_weights.shape[-1] key_sequence_length = attn_weights.shape[-1]
assert ( assert position_ids[0][0] == key_sequence_length - 1, (
position_ids[0][0] == key_sequence_length - 1 "`position_ids` are incorrect. They should be of the format 1 2 3 4 5 ... (key_sequence_length - 1)"
), "`position_ids` are incorrect. They should be of the format 1 2 3 4 5 ... (key_sequence_length - 1)" )
relative_positions = ( relative_positions = (
torch.arange(0, key_sequence_length) torch.arange(0, key_sequence_length)
.unsqueeze(0) .unsqueeze(0)
@ -1313,9 +1313,9 @@ class XLMProphetNetEncoder(XLMProphetNetPreTrainedModel):
# check if head_mask has a correct number of layers specified if desired # check if head_mask has a correct number of layers specified if desired
if head_mask is not None: if head_mask is not None:
assert head_mask.size()[0] == ( assert head_mask.size()[0] == (len(self.layers)), (
len(self.layers) f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." )
for idx, encoder_layer in enumerate(self.layers): for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states: if output_hidden_states:
encoder_hidden_states = encoder_hidden_states + (hidden_states,) encoder_hidden_states = encoder_hidden_states + (hidden_states,)
@ -1488,9 +1488,9 @@ class XLMProphetNetDecoder(XLMProphetNetPreTrainedModel):
# prepare attention mask # prepare attention mask
if past_key_values is not None: if past_key_values is not None:
assert ( assert hidden_states.size(1) == 1, (
hidden_states.size(1) == 1 "At the moment `use_cache` is only supported for `decoder_input_ids` of length 1"
), "At the moment `use_cache` is only supported for `decoder_input_ids` of length 1" )
ngram_hidden_states = [ ngram_hidden_states = [
(ngram_embeddings[ngram - 1] + predicting_stream_pos_embed).repeat(batch_size, 1, 1) (ngram_embeddings[ngram - 1] + predicting_stream_pos_embed).repeat(batch_size, 1, 1)

View File

@ -114,7 +114,7 @@ class DepthProConfig(PretrainedConfig):
# scaled_images_ratios is sorted # scaled_images_ratios is sorted
if scaled_images_ratios != sorted(scaled_images_ratios): if scaled_images_ratios != sorted(scaled_images_ratios):
raise ValueError( raise ValueError(
f"Values in scaled_images_ratios={scaled_images_ratios} " "should be sorted from low to high" f"Values in scaled_images_ratios={scaled_images_ratios} should be sorted from low to high"
) )
# scaled_images_ratios, scaled_images_overlap_ratios, scaled_images_feature_dims should be consistent # scaled_images_ratios, scaled_images_overlap_ratios, scaled_images_feature_dims should be consistent

View File

@ -275,9 +275,9 @@ class FlaxTransformerBlock(nn.Module):
dtype: jnp.dtype = jnp.float32 # the dtype of the computation dtype: jnp.dtype = jnp.float32 # the dtype of the computation
def setup(self): def setup(self):
assert ( assert self.config.dim % self.config.n_heads == 0, (
self.config.dim % self.config.n_heads == 0 f"Hidden size {self.config.dim} not dividable by number of heads {self.config.n_heads}"
), f"Hidden size {self.config.dim} not dividable by number of heads {self.config.n_heads}" )
self.attention = FlaxMultiHeadSelfAttention(self.config, dtype=self.dtype) self.attention = FlaxMultiHeadSelfAttention(self.config, dtype=self.dtype)
self.sa_layer_norm = nn.LayerNorm(epsilon=1e-12, dtype=self.dtype) self.sa_layer_norm = nn.LayerNorm(epsilon=1e-12, dtype=self.dtype)

View File

@ -269,9 +269,9 @@ class TFTransformerBlock(keras.layers.Layer):
self.activation = config.activation self.activation = config.activation
self.output_attentions = config.output_attentions self.output_attentions = config.output_attentions
assert ( assert config.dim % config.n_heads == 0, (
config.dim % config.n_heads == 0 f"Hidden size {config.dim} not dividable by number of heads {config.n_heads}"
), f"Hidden size {config.dim} not dividable by number of heads {config.n_heads}" )
self.attention = TFMultiHeadSelfAttention(config, name="attention") self.attention = TFMultiHeadSelfAttention(config, name="attention")
self.sa_layer_norm = keras.layers.LayerNormalization(epsilon=1e-12, name="sa_layer_norm") self.sa_layer_norm = keras.layers.LayerNormalization(epsilon=1e-12, name="sa_layer_norm")

View File

@ -137,7 +137,7 @@ if __name__ == "__main__":
dest_dir = f"converted-{src_file.name}" if args.dest is None else args.dest dest_dir = f"converted-{src_file.name}" if args.dest is None else args.dest
dest_dir = Path(dest_dir) dest_dir = Path(dest_dir)
assert src_file.exists() assert src_file.exists()
assert ( assert args.type is not None, (
args.type is not None "Please specify the component type of the DPR model to convert: 'ctx_encoder', 'question_encoder' or 'reader'."
), "Please specify the component type of the DPR model to convert: 'ctx_encoder', 'question_encoder' or 'reader'." )
convert(args.type, src_file, dest_dir) convert(args.type, src_file, dest_dir)

View File

@ -170,9 +170,9 @@ class CustomDPRReaderTokenizerMixin:
texts = texts if not isinstance(texts, str) else [texts] texts = texts if not isinstance(texts, str) else [texts]
n_passages = len(titles) n_passages = len(titles)
questions = questions if not isinstance(questions, str) else [questions] * n_passages questions = questions if not isinstance(questions, str) else [questions] * n_passages
assert len(titles) == len( assert len(titles) == len(texts), (
texts f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts."
), f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts." )
encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"] encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"]
encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"] encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"]
encoded_inputs = { encoded_inputs = {

View File

@ -617,8 +617,7 @@ class EncodecModel(EncodecPreTrainedModel):
bandwidth = self.config.target_bandwidths[0] bandwidth = self.config.target_bandwidths[0]
if bandwidth not in self.config.target_bandwidths: if bandwidth not in self.config.target_bandwidths:
raise ValueError( raise ValueError(
f"This model doesn't support the bandwidth {bandwidth}. " f"This model doesn't support the bandwidth {bandwidth}. Select one of {self.config.target_bandwidths}."
f"Select one of {self.config.target_bandwidths}."
) )
_, channels, input_length = input_values.shape _, channels, input_length = input_values.shape

View File

@ -399,13 +399,11 @@ def map_structure_with_atom_order(in_list: list, first_call: bool = True) -> lis
@functools.lru_cache(maxsize=None) @functools.lru_cache(maxsize=None)
def load_stereo_chemical_props() -> ( def load_stereo_chemical_props() -> Tuple[
Tuple[
Mapping[str, List[Bond]], Mapping[str, List[Bond]],
Mapping[str, List[Bond]], Mapping[str, List[Bond]],
Mapping[str, List[BondAngle]], Mapping[str, List[BondAngle]],
] ]:
):
"""Load stereo_chemical_props.txt into a nice structure. """Load stereo_chemical_props.txt into a nice structure.
Load literature values for bond lengths and bond angles and translate bond angles into the length of the opposite Load literature values for bond lengths and bond angles and translate bond angles into the length of the opposite

View File

@ -539,9 +539,9 @@ class FSMTEncoder(nn.Module):
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
# check if head_mask has a correct number of layers specified if desired # check if head_mask has a correct number of layers specified if desired
if head_mask is not None: if head_mask is not None:
assert head_mask.size()[0] == ( assert head_mask.size()[0] == (len(self.layers)), (
len(self.layers) f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." )
for idx, encoder_layer in enumerate(self.layers): for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states: if output_hidden_states:
x = x.transpose(0, 1) # T x B x C -> B x T x C x = x.transpose(0, 1) # T x B x C -> B x T x C
@ -960,9 +960,9 @@ class Attention(nn.Module):
attn_weights = nn.functional.softmax(attn_weights, dim=-1) attn_weights = nn.functional.softmax(attn_weights, dim=-1)
if layer_head_mask is not None: if layer_head_mask is not None:
assert layer_head_mask.size() == ( assert layer_head_mask.size() == (self.num_heads,), (
self.num_heads, f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}"
), f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" )
attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)

View File

@ -113,9 +113,9 @@ class FunnelConfig(PretrainedConfig):
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.block_sizes = block_sizes self.block_sizes = block_sizes
self.block_repeats = [1] * len(block_sizes) if block_repeats is None else block_repeats self.block_repeats = [1] * len(block_sizes) if block_repeats is None else block_repeats
assert len(block_sizes) == len( assert len(block_sizes) == len(self.block_repeats), (
self.block_repeats "`block_sizes` and `block_repeats` should have the same length."
), "`block_sizes` and `block_repeats` should have the same length." )
self.num_decoder_layers = num_decoder_layers self.num_decoder_layers = num_decoder_layers
self.d_model = d_model self.d_model = d_model
self.n_head = n_head self.n_head = n_head

View File

@ -195,7 +195,7 @@ class FuyuConfig(PretrainedConfig):
if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2: if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
raise ValueError( raise ValueError(
"`rope_scaling` must be a dictionary with two fields, `type` and `factor`, " f"got {self.rope_scaling}" f"`rope_scaling` must be a dictionary with two fields, `type` and `factor`, got {self.rope_scaling}"
) )
rope_scaling_type = self.rope_scaling.get("type", None) rope_scaling_type = self.rope_scaling.get("type", None)
rope_scaling_factor = self.rope_scaling.get("factor", None) rope_scaling_factor = self.rope_scaling.get("factor", None)

View File

@ -683,7 +683,7 @@ class GitVisionEmbeddings(nn.Module):
batch_size, _, height, width = pixel_values.shape batch_size, _, height, width = pixel_values.shape
if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size):
raise ValueError( raise ValueError(
f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})."
) )
target_dtype = self.patch_embedding.weight.dtype target_dtype = self.patch_embedding.weight.dtype
patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid]

View File

@ -250,7 +250,7 @@ class SubWordJapaneseTokenizer:
) )
keisen = "─━│┃┄┅┆┇┈┉┊┋┌┍┎┏┐┑┒┓└┕┖┗┘┙┚┛├┝┞┟┠┡┢┣┤┥┦┧┨┩┪┫┬┭┮┯┰┱┲┳┴┵┶┷┸┹┺┻┼┽┾┿╀╁╂╃╄╅╆╇╈╉╊╋╌╍╎╏═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬╭╮╯╰╱╲╳╴╵╶╷╸╹╺╻╼╽╾╿" keisen = "─━│┃┄┅┆┇┈┉┊┋┌┍┎┏┐┑┒┓└┕┖┗┘┙┚┛├┝┞┟┠┡┢┣┤┥┦┧┨┩┪┫┬┭┮┯┰┱┲┳┴┵┶┷┸┹┺┻┼┽┾┿╀╁╂╃╄╅╆╇╈╉╊╋╌╍╎╏═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬╭╮╯╰╱╲╳╴╵╶╷╸╹╺╻╼╽╾╿"
blocks = "▀▁▂▃▄▅▆▇█▉▊▋▌▍▎▏▐░▒▓▔▕▖▗▘▙▚▛▜▝▞▟" blocks = "▀▁▂▃▄▅▆▇█▉▊▋▌▍▎▏▐░▒▓▔▕▖▗▘▙▚▛▜▝▞▟"
self.content_trans1 = str.maketrans({k: "<BLOCK>" for k in keisen + blocks}) self.content_trans1 = str.maketrans(dict.fromkeys(keisen + blocks, "<BLOCK>"))
def __len__(self): def __len__(self):
return len(self.ids_to_tokens) return len(self.ids_to_tokens)

View File

@ -171,9 +171,9 @@ class QuantAct(nn.Module):
x_min = x_act.data.min() x_min = x_act.data.min()
x_max = x_act.data.max() x_max = x_act.data.max()
assert ( assert x_max.isnan().sum() == 0 and x_min.isnan().sum() == 0, (
x_max.isnan().sum() == 0 and x_min.isnan().sum() == 0 "NaN detected when computing min/max of the activation"
), "NaN detected when computing min/max of the activation" )
# Initialization # Initialization
if self.x_min.min() > -1.1e-5 and self.x_max.max() < 1.1e-5: if self.x_min.min() > -1.1e-5 and self.x_max.max() < 1.1e-5:

View File

@ -451,7 +451,7 @@ class Kosmos2VisionEmbeddings(nn.Module):
batch_size, _, height, width = pixel_values.shape batch_size, _, height, width = pixel_values.shape
if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size):
raise ValueError( raise ValueError(
f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})."
) )
target_dtype = self.patch_embedding.weight.dtype target_dtype = self.patch_embedding.weight.dtype
patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid]

View File

@ -101,8 +101,7 @@ class LayoutXLMProcessor(ProcessorMixin):
# verify input # verify input
if self.image_processor.apply_ocr and (boxes is not None): if self.image_processor.apply_ocr and (boxes is not None):
raise ValueError( raise ValueError(
"You cannot provide bounding boxes " "You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True."
"if you initialized the image processor with apply_ocr set to True."
) )
if self.image_processor.apply_ocr and (word_labels is not None): if self.image_processor.apply_ocr and (word_labels is not None):

View File

@ -130,12 +130,12 @@ class LEDEncoderSelfAttention(nn.Module):
self.layer_id = layer_id self.layer_id = layer_id
attention_window = config.attention_window[self.layer_id] attention_window = config.attention_window[self.layer_id]
assert ( assert attention_window % 2 == 0, (
attention_window % 2 == 0 f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}"
), f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}" )
assert ( assert attention_window > 0, (
attention_window > 0 f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}"
), f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}" )
self.one_sided_attn_window_size = attention_window // 2 self.one_sided_attn_window_size = attention_window // 2
@ -169,9 +169,9 @@ class LEDEncoderSelfAttention(nn.Module):
value_vectors = self.value(hidden_states) value_vectors = self.value(hidden_states)
seq_len, batch_size, embed_dim = hidden_states.size() seq_len, batch_size, embed_dim = hidden_states.size()
assert ( assert embed_dim == self.embed_dim, (
embed_dim == self.embed_dim f"hidden_states should have embed_dim = {self.embed_dim}, but has {embed_dim}"
), f"hidden_states should have embed_dim = {self.embed_dim}, but has {embed_dim}" )
# normalize query # normalize query
query_vectors /= math.sqrt(self.head_dim) query_vectors /= math.sqrt(self.head_dim)
@ -239,9 +239,9 @@ class LEDEncoderSelfAttention(nn.Module):
) # use fp32 for numerical stability ) # use fp32 for numerical stability
if layer_head_mask is not None: if layer_head_mask is not None:
assert layer_head_mask.size() == ( assert layer_head_mask.size() == (self.num_heads,), (
self.num_heads, f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}"
), f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" )
attn_probs = layer_head_mask.view(1, 1, -1, 1) * attn_probs attn_probs = layer_head_mask.view(1, 1, -1, 1) * attn_probs
# softmax sometimes inserts NaN if all positions are masked, replace them with 0 # softmax sometimes inserts NaN if all positions are masked, replace them with 0
@ -433,9 +433,9 @@ class LEDEncoderSelfAttention(nn.Module):
overlap of size window_overlap overlap of size window_overlap
""" """
batch_size, seq_len, num_heads, head_dim = query.size() batch_size, seq_len, num_heads, head_dim = query.size()
assert ( assert seq_len % (window_overlap * 2) == 0, (
seq_len % (window_overlap * 2) == 0 f"Sequence length should be multiple of {window_overlap * 2}. Given {seq_len}"
), f"Sequence length should be multiple of {window_overlap * 2}. Given {seq_len}" )
assert query.size() == key.size() assert query.size() == key.size()
chunks_count = torch.div(seq_len, window_overlap, rounding_mode="trunc") - 1 chunks_count = torch.div(seq_len, window_overlap, rounding_mode="trunc") - 1
@ -706,9 +706,9 @@ class LEDEncoderSelfAttention(nn.Module):
# apply layer head masking # apply layer head masking
if layer_head_mask is not None: if layer_head_mask is not None:
assert layer_head_mask.size() == ( assert layer_head_mask.size() == (self.num_heads,), (
self.num_heads, f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}"
), f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" )
global_attn_probs_float = layer_head_mask.view(1, -1, 1, 1) * global_attn_probs_float.view( global_attn_probs_float = layer_head_mask.view(1, -1, 1, 1) * global_attn_probs_float.view(
batch_size, self.num_heads, max_num_global_attn_indices, seq_len batch_size, self.num_heads, max_num_global_attn_indices, seq_len
) )

View File

@ -182,12 +182,12 @@ class TFLEDEncoderSelfAttention(keras.layers.Layer):
self.layer_id = layer_id self.layer_id = layer_id
attention_window = config.attention_window[self.layer_id] attention_window = config.attention_window[self.layer_id]
assert ( assert attention_window % 2 == 0, (
attention_window % 2 == 0 f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}"
), f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}" )
assert ( assert attention_window > 0, (
attention_window > 0 f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}"
), f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}" )
self.one_sided_attn_window_size = attention_window // 2 self.one_sided_attn_window_size = attention_window // 2

View File

@ -510,12 +510,12 @@ class LongformerSelfAttention(nn.Module):
self.layer_id = layer_id self.layer_id = layer_id
attention_window = config.attention_window[self.layer_id] attention_window = config.attention_window[self.layer_id]
assert ( assert attention_window % 2 == 0, (
attention_window % 2 == 0 f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}"
), f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}" )
assert ( assert attention_window > 0, (
attention_window > 0 f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}"
), f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}" )
self.one_sided_attn_window_size = attention_window // 2 self.one_sided_attn_window_size = attention_window // 2
@ -549,9 +549,9 @@ class LongformerSelfAttention(nn.Module):
value_vectors = self.value(hidden_states) value_vectors = self.value(hidden_states)
seq_len, batch_size, embed_dim = hidden_states.size() seq_len, batch_size, embed_dim = hidden_states.size()
assert ( assert embed_dim == self.embed_dim, (
embed_dim == self.embed_dim f"hidden_states should have embed_dim = {self.embed_dim}, but has {embed_dim}"
), f"hidden_states should have embed_dim = {self.embed_dim}, but has {embed_dim}" )
# normalize query # normalize query
query_vectors /= math.sqrt(self.head_dim) query_vectors /= math.sqrt(self.head_dim)
@ -619,9 +619,9 @@ class LongformerSelfAttention(nn.Module):
) # use fp32 for numerical stability ) # use fp32 for numerical stability
if layer_head_mask is not None: if layer_head_mask is not None:
assert layer_head_mask.size() == ( assert layer_head_mask.size() == (self.num_heads,), (
self.num_heads, f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}"
), f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" )
attn_probs = layer_head_mask.view(1, 1, -1, 1) * attn_probs attn_probs = layer_head_mask.view(1, 1, -1, 1) * attn_probs
# softmax sometimes inserts NaN if all positions are masked, replace them with 0 # softmax sometimes inserts NaN if all positions are masked, replace them with 0
@ -813,9 +813,9 @@ class LongformerSelfAttention(nn.Module):
overlap of size window_overlap overlap of size window_overlap
""" """
batch_size, seq_len, num_heads, head_dim = query.size() batch_size, seq_len, num_heads, head_dim = query.size()
assert ( assert seq_len % (window_overlap * 2) == 0, (
seq_len % (window_overlap * 2) == 0 f"Sequence length should be multiple of {window_overlap * 2}. Given {seq_len}"
), f"Sequence length should be multiple of {window_overlap * 2}. Given {seq_len}" )
assert query.size() == key.size() assert query.size() == key.size()
chunks_count = torch.div(seq_len, window_overlap, rounding_mode="trunc") - 1 chunks_count = torch.div(seq_len, window_overlap, rounding_mode="trunc") - 1
@ -1086,9 +1086,9 @@ class LongformerSelfAttention(nn.Module):
# apply layer head masking # apply layer head masking
if layer_head_mask is not None: if layer_head_mask is not None:
assert layer_head_mask.size() == ( assert layer_head_mask.size() == (self.num_heads,), (
self.num_heads, f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}"
), f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" )
global_attn_probs_float = layer_head_mask.view(1, -1, 1, 1) * global_attn_probs_float.view( global_attn_probs_float = layer_head_mask.view(1, -1, 1, 1) * global_attn_probs_float.view(
batch_size, self.num_heads, max_num_global_attn_indices, seq_len batch_size, self.num_heads, max_num_global_attn_indices, seq_len
) )
@ -1287,9 +1287,9 @@ class LongformerEncoder(nn.Module):
# check if head_mask has a correct number of layers specified if desired # check if head_mask has a correct number of layers specified if desired
if head_mask is not None: if head_mask is not None:
assert head_mask.size()[0] == ( assert head_mask.size()[0] == (len(self.layer)), (
len(self.layer) f"The head_mask should be specified for {len(self.layer)} layers, but it is for {head_mask.size()[0]}."
), f"The head_mask should be specified for {len(self.layer)} layers, but it is for {head_mask.size()[0]}." )
for idx, layer_module in enumerate(self.layer): for idx, layer_module in enumerate(self.layer):
if output_hidden_states: if output_hidden_states:
all_hidden_states = all_hidden_states + (hidden_states,) all_hidden_states = all_hidden_states + (hidden_states,)
@ -1590,8 +1590,7 @@ class LongformerModel(LongformerPreTrainedModel):
# this path should be recorded in the ONNX export, it is fine with padding_len == 0 as well # this path should be recorded in the ONNX export, it is fine with padding_len == 0 as well
if padding_len > 0: if padding_len > 0:
logger.warning_once( logger.warning_once(
f"Input ids are automatically padded to be a multiple of " f"Input ids are automatically padded to be a multiple of `config.attention_window`: {attention_window}"
f"`config.attention_window`: {attention_window}"
) )
if input_ids is not None: if input_ids is not None:
input_ids = nn.functional.pad(input_ids, (0, padding_len), value=pad_token_id) input_ids = nn.functional.pad(input_ids, (0, padding_len), value=pad_token_id)

View File

@ -746,12 +746,12 @@ class TFLongformerSelfAttention(keras.layers.Layer):
self.layer_id = layer_id self.layer_id = layer_id
attention_window = config.attention_window[self.layer_id] attention_window = config.attention_window[self.layer_id]
assert ( assert attention_window % 2 == 0, (
attention_window % 2 == 0 f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}"
), f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}" )
assert ( assert attention_window > 0, (
attention_window > 0 f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}"
), f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}" )
self.one_sided_attn_window_size = attention_window // 2 self.one_sided_attn_window_size = attention_window // 2

View File

@ -1294,7 +1294,7 @@ class M2M100Decoder(M2M100PreTrainedModel):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning_once( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting" " `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False

View File

@ -228,7 +228,7 @@ class TatoebaConverter:
# combine with Tatoeba markdown # combine with Tatoeba markdown
readme_url = f"{TATOEBA_MODELS_URL}/{model_dict['_name']}/README.md" readme_url = f"{TATOEBA_MODELS_URL}/{model_dict['_name']}/README.md"
extra_markdown = f""" extra_markdown = f"""
### {model_dict['_name']} ### {model_dict["_name"]}
* source language name: {self.tag2name[a3_src]} * source language name: {self.tag2name[a3_src]}
* target language name: {self.tag2name[a3_tgt]} * target language name: {self.tag2name[a3_tgt]}
@ -237,12 +237,12 @@ class TatoebaConverter:
content = ( content = (
f""" f"""
* model: {model_dict['modeltype']} * model: {model_dict["modeltype"]}
* source language code{src_multilingual*'s'}: {', '.join(a2_src_tags)} * source language code{src_multilingual * "s"}: {", ".join(a2_src_tags)}
* target language code{tgt_multilingual*'s'}: {', '.join(a2_tgt_tags)} * target language code{tgt_multilingual * "s"}: {", ".join(a2_tgt_tags)}
* dataset: opus {backtranslated_data} * dataset: opus {backtranslated_data}
* release date: {model_dict['release-date']} * release date: {model_dict["release-date"]}
* pre-processing: {model_dict['pre-processing']} * pre-processing: {model_dict["pre-processing"]}
""" """
+ multilingual_data + multilingual_data
+ tuned + tuned

View File

@ -741,9 +741,9 @@ class MarianEncoder(MarianPreTrainedModel):
# check if head_mask has a correct number of layers specified if desired # check if head_mask has a correct number of layers specified if desired
if head_mask is not None: if head_mask is not None:
assert head_mask.size()[0] == ( assert head_mask.size()[0] == (len(self.layers)), (
len(self.layers) f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." )
for idx, encoder_layer in enumerate(self.layers): for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states: if output_hidden_states:
encoder_states = encoder_states + (hidden_states,) encoder_states = encoder_states + (hidden_states,)

View File

@ -339,7 +339,7 @@ class MarianTokenizer(PreTrainedTokenizer):
def __getstate__(self) -> Dict: def __getstate__(self) -> Dict:
state = self.__dict__.copy() state = self.__dict__.copy()
state.update( state.update(
{k: None for k in ["spm_source", "spm_target", "current_spm", "punc_normalizer", "target_vocab_file"]} dict.fromkeys(["spm_source", "spm_target", "current_spm", "punc_normalizer", "target_vocab_file"])
) )
return state return state

View File

@ -863,9 +863,9 @@ def test(
for original_model_feature, our_model_feature in zip( for original_model_feature, our_model_feature in zip(
original_model_backbone_features.values(), our_model_output.encoder_hidden_states original_model_backbone_features.values(), our_model_output.encoder_hidden_states
): ):
assert torch.allclose( assert torch.allclose(original_model_feature, our_model_feature, atol=tolerance), (
original_model_feature, our_model_feature, atol=tolerance "The backbone features are not the same."
), "The backbone features are not the same." )
# Test pixel decoder # Test pixel decoder
mask_features, _, multi_scale_features = original_model.sem_seg_head.pixel_decoder.forward_features( mask_features, _, multi_scale_features = original_model.sem_seg_head.pixel_decoder.forward_features(
@ -875,9 +875,9 @@ def test(
for original_model_feature, our_model_feature in zip( for original_model_feature, our_model_feature in zip(
multi_scale_features, our_model_output.pixel_decoder_hidden_states multi_scale_features, our_model_output.pixel_decoder_hidden_states
): ):
assert torch.allclose( assert torch.allclose(original_model_feature, our_model_feature, atol=tolerance), (
original_model_feature, our_model_feature, atol=tolerance "The pixel decoder feature are not the same"
), "The pixel decoder feature are not the same" )
# Let's test the full model # Let's test the full model
tr_complete = T.Compose( tr_complete = T.Compose(
@ -894,12 +894,12 @@ def test(
assert original_mask_logits.shape == our_mask_logits.shape, "Output masks shapes are not matching." assert original_mask_logits.shape == our_mask_logits.shape, "Output masks shapes are not matching."
assert original_class_logits.shape == our_class_logits.shape, "Output class logits shapes are not matching." assert original_class_logits.shape == our_class_logits.shape, "Output class logits shapes are not matching."
assert torch.allclose( assert torch.allclose(original_class_logits, our_class_logits, atol=tolerance), (
original_class_logits, our_class_logits, atol=tolerance "The class logits are not the same."
), "The class logits are not the same." )
assert torch.allclose( assert torch.allclose(original_mask_logits, our_mask_logits, atol=tolerance), (
original_mask_logits, our_mask_logits, atol=tolerance "The predicted masks are not the same."
), "The predicted masks are not the same." )
logger.info("✅ Test passed!") logger.info("✅ Test passed!")

View File

@ -581,9 +581,9 @@ def test(original_model, our_model: MaskFormerForInstanceSegmentation, image_pro
for original_model_feature, our_model_feature in zip( for original_model_feature, our_model_feature in zip(
original_model_backbone_features.values(), our_model_output.encoder_hidden_states original_model_backbone_features.values(), our_model_output.encoder_hidden_states
): ):
assert torch.allclose( assert torch.allclose(original_model_feature, our_model_feature, atol=1e-3), (
original_model_feature, our_model_feature, atol=1e-3 "The backbone features are not the same."
), "The backbone features are not the same." )
original_model_pixel_out = original_model.sem_seg_head.pixel_decoder.forward_features( original_model_pixel_out = original_model.sem_seg_head.pixel_decoder.forward_features(
original_model_backbone_features original_model_backbone_features
@ -602,9 +602,9 @@ def test(original_model, our_model: MaskFormerForInstanceSegmentation, image_pro
our_segmentation = image_processor.post_process_segmentation(our_model_out, target_size=(384, 384)) our_segmentation = image_processor.post_process_segmentation(our_model_out, target_size=(384, 384))
assert torch.allclose( assert torch.allclose(original_segmentation, our_segmentation, atol=1e-3), (
original_segmentation, our_segmentation, atol=1e-3 "The segmentation image is not the same."
), "The segmentation image is not the same." )
logger.info("✅ Test passed!") logger.info("✅ Test passed!")

View File

@ -144,9 +144,9 @@ def load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path):
elif m_name == "kernel": elif m_name == "kernel":
array = np.transpose(array) array = np.transpose(array)
try: try:
assert ( assert pointer.shape == array.shape, (
pointer.shape == array.shape f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" )
except AssertionError as e: except AssertionError as e:
e.args += (pointer.shape, array.shape) e.args += (pointer.shape, array.shape)
raise raise

View File

@ -99,9 +99,9 @@ def get_mobilevitv2_config(task_name, orig_cfg_file):
orig_config = load_orig_config_file(orig_cfg_file) orig_config = load_orig_config_file(orig_cfg_file)
assert getattr(orig_config, "model.classification.name", -1) == "mobilevit_v2", "Invalid model" assert getattr(orig_config, "model.classification.name", -1) == "mobilevit_v2", "Invalid model"
config.width_multiplier = getattr(orig_config, "model.classification.mitv2.width_multiplier", 1.0) config.width_multiplier = getattr(orig_config, "model.classification.mitv2.width_multiplier", 1.0)
assert ( assert getattr(orig_config, "model.classification.mitv2.attn_norm_layer", -1) == "layer_norm_2d", (
getattr(orig_config, "model.classification.mitv2.attn_norm_layer", -1) == "layer_norm_2d" "Norm layers other than layer_norm_2d is not supported"
), "Norm layers other than layer_norm_2d is not supported" )
config.hidden_act = getattr(orig_config, "model.classification.activation.name", "swish") config.hidden_act = getattr(orig_config, "model.classification.activation.name", "swish")
# config.image_size == getattr(orig_config, 'sampler.bs.crop_size_width', 256) # config.image_size == getattr(orig_config, 'sampler.bs.crop_size_width', 256)
@ -184,7 +184,9 @@ def create_rename_keys(state_dict, base_model=False):
) )
if f"layer_{i}.1.conv_proj." in k: if f"layer_{i}.1.conv_proj." in k:
k_new = k_new.replace(f"layer_{i}.1.conv_proj.", f"{model_prefix}encoder.layer.{i-1}.conv_projection.") k_new = k_new.replace(
f"layer_{i}.1.conv_proj.", f"{model_prefix}encoder.layer.{i - 1}.conv_projection."
)
if "pre_norm_attn.0." in k: if "pre_norm_attn.0." in k:
k_new = k_new.replace("pre_norm_attn.0.", "layernorm_before.") k_new = k_new.replace("pre_norm_attn.0.", "layernorm_before.")

View File

@ -56,7 +56,7 @@ def _read_h5_weights(group, current_key="", weights={}):
def _convert_layer_names(name, gated_mlp=False): def _convert_layer_names(name, gated_mlp=False):
name = re.sub( name = re.sub(
r"layers\.functional(?:_(\d+))?\.layers", r"layers\.functional(?:_(\d+))?\.layers",
lambda m: f'layers.{m.group(1) if m.group(1) else "0"}', lambda m: f"layers.{m.group(1) if m.group(1) else '0'}",
name, name,
count=1, count=1,
) )

View File

@ -719,9 +719,9 @@ def load_tf_weights_in_mt5(model, config, tf_checkpoint_path):
logger.info(f"Transposing numpy weight of shape {array.shape} for {name}") logger.info(f"Transposing numpy weight of shape {array.shape} for {name}")
array = np.transpose(array) array = np.transpose(array)
try: try:
assert ( assert pointer.shape == array.shape, (
pointer.shape == array.shape f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" )
except AssertionError as e: except AssertionError as e:
e.args += (pointer.shape, array.shape) e.args += (pointer.shape, array.shape)
raise raise

View File

@ -65,13 +65,13 @@ def get_args():
"--hf_input_path", "--hf_input_path",
type=str, type=str,
default=None, default=None,
help="A HF model path, " "e.g. a folder containing https://huggingface.co/nvidia/Minitron-8B-Base", help="A HF model path, e.g. a folder containing https://huggingface.co/nvidia/Minitron-8B-Base",
) )
parser.add_argument( parser.add_argument(
"--hf_output_path", "--hf_output_path",
type=str, type=str,
default=None, default=None,
help="Output HF model path, " "with the same format as above but user's own weights", help="Output HF model path, with the same format as above but user's own weights",
) )
parser.add_argument( parser.add_argument(
"--precision", "--precision",

View File

@ -91,7 +91,9 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig
) )
# Add the last block # Add the last block
save_path = os.path.join(dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts)+1:05d}-of-???.bin")) save_path = os.path.join(
dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts) + 1:05d}-of-???.bin")
)
shared_weights = torch.load(switch_checkpoint_path + "-shared.pt")["model"] shared_weights = torch.load(switch_checkpoint_path + "-shared.pt")["model"]
remove_ignore_keys_(shared_weights) remove_ignore_keys_(shared_weights)
shared_weights = rename_fairseq_keys(shared_weights, None) shared_weights = rename_fairseq_keys(shared_weights, None)

View File

@ -1352,7 +1352,7 @@ class NllbMoeDecoder(NllbMoePreTrainedModel):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning_once( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting" " `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False

View File

@ -5,7 +5,6 @@
# modular_olmo2.py file directly. One of our CI enforces this. # modular_olmo2.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig

View File

@ -1,7 +1,7 @@
from typing import Callable, Optional, Tuple from typing import Callable, Optional, Tuple
import torch import torch
from torch import nn import torch.nn as nn
from ...cache_utils import Cache from ...cache_utils import Cache
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS from ...modeling_utils import ALL_ATTENTION_FUNCTIONS

View File

@ -1010,9 +1010,9 @@ def test(
for original_model_feature, our_model_feature in zip( for original_model_feature, our_model_feature in zip(
original_model_backbone_features.values(), our_model_output.encoder_hidden_states original_model_backbone_features.values(), our_model_output.encoder_hidden_states
): ):
assert torch.allclose( assert torch.allclose(original_model_feature, our_model_feature, atol=3e-3), (
original_model_feature, our_model_feature, atol=3e-3 "The backbone features are not the same."
), "The backbone features are not the same." )
mask_features, _, multi_scale_features, _, _ = original_model.sem_seg_head.pixel_decoder.forward_features( mask_features, _, multi_scale_features, _, _ = original_model.sem_seg_head.pixel_decoder.forward_features(
original_model_backbone_features original_model_backbone_features
) )
@ -1025,9 +1025,9 @@ def test(
for original_model_feature, our_model_feature in zip( for original_model_feature, our_model_feature in zip(
original_pixel_decoder_features, our_model_output.pixel_decoder_hidden_states original_pixel_decoder_features, our_model_output.pixel_decoder_hidden_states
): ):
assert torch.allclose( assert torch.allclose(original_model_feature, our_model_feature, atol=3e-4), (
original_model_feature, our_model_feature, atol=3e-4 "The pixel decoder feature are not the same"
), "The pixel decoder feature are not the same" )
tr_complete = T.Compose( tr_complete = T.Compose(
[ [
@ -1049,9 +1049,9 @@ def test(
our_segmentation = post_process_sem_seg_output(our_model_out, target_size=(640, 640))[0] our_segmentation = post_process_sem_seg_output(our_model_out, target_size=(640, 640))[0]
assert torch.allclose( assert torch.allclose(original_segmentation, our_segmentation, atol=1e-3), (
original_segmentation, our_segmentation, atol=1e-3 "The segmentation image is not the same."
), "The segmentation image is not the same." )
logger.info("✅ Test passed!") logger.info("✅ Test passed!")

View File

@ -62,9 +62,9 @@ class TFAttention(keras.layers.Layer):
n_state = nx # in Attention: n_state=768 (nx=n_embd) n_state = nx # in Attention: n_state=768 (nx=n_embd)
# [switch nx => n_state from Block to Attention to keep identical to TF implementation] # [switch nx => n_state from Block to Attention to keep identical to TF implementation]
assert ( assert n_state % config.n_head == 0, (
n_state % config.n_head == 0 f"Hidden dimension {n_state} not dividable by number of heads {config.n_head}"
), f"Hidden dimension {n_state} not dividable by number of heads {config.n_head}" )
self.n_head = config.n_head self.n_head = config.n_head
self.split_size = n_state self.split_size = n_state
self.scale = scale self.scale = scale

View File

@ -173,7 +173,7 @@ def _preprocess_resize_output_shape(image, output_shape):
# multichannel case: append shape of last axis # multichannel case: append shape of last axis
output_shape = output_shape + (image.shape[-1],) output_shape = output_shape + (image.shape[-1],)
elif output_ndim < image.ndim: elif output_ndim < image.ndim:
raise ValueError("output_shape length cannot be smaller than the " "image number of dimensions") raise ValueError("output_shape length cannot be smaller than the image number of dimensions")
return image, output_shape return image, output_shape
@ -345,10 +345,10 @@ class Owlv2ImageProcessor(BaseImageProcessor):
else: else:
anti_aliasing_sigma = np.atleast_1d(anti_aliasing_sigma) * np.ones_like(factors) anti_aliasing_sigma = np.atleast_1d(anti_aliasing_sigma) * np.ones_like(factors)
if np.any(anti_aliasing_sigma < 0): if np.any(anti_aliasing_sigma < 0):
raise ValueError("Anti-aliasing standard deviation must be " "greater than or equal to zero") raise ValueError("Anti-aliasing standard deviation must be greater than or equal to zero")
elif np.any((anti_aliasing_sigma > 0) & (factors <= 1)): elif np.any((anti_aliasing_sigma > 0) & (factors <= 1)):
warnings.warn( warnings.warn(
"Anti-aliasing standard deviation greater than zero but " "not down-sampling along all axes" "Anti-aliasing standard deviation greater than zero but not down-sampling along all axes"
) )
filtered = ndi.gaussian_filter(image, anti_aliasing_sigma, cval=cval, mode=ndi_mode) filtered = ndi.gaussian_filter(image, anti_aliasing_sigma, cval=cval, mode=ndi_mode)
else: else:

View File

@ -118,9 +118,9 @@ def convert_prophetnet_checkpoint_to_pytorch(prophetnet_checkpoint_path: str, py
is_key_init = True is_key_init = True
break break
elif attribute == "position_embeddings": elif attribute == "position_embeddings":
assert ( assert model.position_embeddings.weight.shape[-1] == old_model.embed_positions.weight.shape[-1], (
model.position_embeddings.weight.shape[-1] == old_model.embed_positions.weight.shape[-1] "Hidden size has to match"
), "Hidden size has to match" )
assert model.position_embeddings.weight.shape[0] == 512, "We want 512 position_embeddings." assert model.position_embeddings.weight.shape[0] == 512, "We want 512 position_embeddings."
model.position_embeddings.weight = nn.Parameter(old_model.embed_positions.weight[:512, :]) model.position_embeddings.weight = nn.Parameter(old_model.embed_positions.weight[:512, :])
is_key_init = True is_key_init = True

View File

@ -588,9 +588,9 @@ class ProphetNetPositionalEmbeddings(nn.Embedding):
super().__init__(config.max_position_embeddings, config.hidden_size, config.pad_token_id) super().__init__(config.max_position_embeddings, config.hidden_size, config.pad_token_id)
def forward(self, inputs_shape, device, attention_mask=None, past_key_values=None, position_ids=None): def forward(self, inputs_shape, device, attention_mask=None, past_key_values=None, position_ids=None):
assert (position_ids is None) or ( assert (position_ids is None) or (self.padding_idx is None), (
self.padding_idx is None "If position_ids is pre-computed then padding_idx should not be set."
), "If position_ids is pre-computed then padding_idx should not be set." )
if position_ids is None: if position_ids is None:
if past_key_values is not None: if past_key_values is not None:
@ -784,9 +784,9 @@ class ProphetNetNgramSelfAttention(nn.Module):
self.head_dim = config.hidden_size // self.num_attn_heads self.head_dim = config.hidden_size // self.num_attn_heads
self.ngram = config.ngram self.ngram = config.ngram
assert ( assert self.head_dim * self.num_attn_heads == config.hidden_size, (
self.head_dim * self.num_attn_heads == config.hidden_size "config.hidden_size must be divisible by num_attn_heads"
), "config.hidden_size must be divisible by num_attn_heads" )
# key, value, query projection # key, value, query projection
self.key_proj = nn.Linear(config.hidden_size, config.hidden_size) self.key_proj = nn.Linear(config.hidden_size, config.hidden_size)
self.value_proj = nn.Linear(config.hidden_size, config.hidden_size) self.value_proj = nn.Linear(config.hidden_size, config.hidden_size)
@ -1041,9 +1041,9 @@ class ProphetNetNgramSelfAttention(nn.Module):
if predict_relative_position_buckets is None: if predict_relative_position_buckets is None:
key_sequence_length = attn_weights.shape[-1] key_sequence_length = attn_weights.shape[-1]
assert ( assert position_ids[0][0] == key_sequence_length - 1, (
position_ids[0][0] == key_sequence_length - 1 "`position_ids` are incorrect. They should be of the format 1 2 3 4 5 ... (key_sequence_length - 1)"
), "`position_ids` are incorrect. They should be of the format 1 2 3 4 5 ... (key_sequence_length - 1)" )
relative_positions = ( relative_positions = (
torch.arange(0, key_sequence_length) torch.arange(0, key_sequence_length)
.unsqueeze(0) .unsqueeze(0)
@ -1313,9 +1313,9 @@ class ProphetNetEncoder(ProphetNetPreTrainedModel):
# check if head_mask has a correct number of layers specified if desired # check if head_mask has a correct number of layers specified if desired
if head_mask is not None: if head_mask is not None:
assert head_mask.size()[0] == ( assert head_mask.size()[0] == (len(self.layers)), (
len(self.layers) f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." )
for idx, encoder_layer in enumerate(self.layers): for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states: if output_hidden_states:
encoder_hidden_states = encoder_hidden_states + (hidden_states,) encoder_hidden_states = encoder_hidden_states + (hidden_states,)
@ -1488,9 +1488,9 @@ class ProphetNetDecoder(ProphetNetPreTrainedModel):
# prepare attention mask # prepare attention mask
if past_key_values is not None: if past_key_values is not None:
assert ( assert hidden_states.size(1) == 1, (
hidden_states.size(1) == 1 "At the moment `use_cache` is only supported for `decoder_input_ids` of length 1"
), "At the moment `use_cache` is only supported for `decoder_input_ids` of length 1" )
ngram_hidden_states = [ ngram_hidden_states = [
(ngram_embeddings[ngram - 1] + predicting_stream_pos_embed).repeat(batch_size, 1, 1) (ngram_embeddings[ngram - 1] + predicting_stream_pos_embed).repeat(batch_size, 1, 1)

View File

@ -162,7 +162,7 @@ def convert_pvt_checkpoint(pvt_size, pvt_checkpoint, pytorch_dump_folder_path):
elif pvt_size == "large": elif pvt_size == "large":
config_path = "Zetatech/pvt-large-224" config_path = "Zetatech/pvt-large-224"
else: else:
raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but " f"'{pvt_size}' was given") raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but '{pvt_size}' was given")
config = PvtConfig(name_or_path=config_path) config = PvtConfig(name_or_path=config_path)
# load original model from https://github.com/whai362/PVT # load original model from https://github.com/whai362/PVT
state_dict = torch.load(pvt_checkpoint, map_location="cpu") state_dict = torch.load(pvt_checkpoint, map_location="cpu")
@ -192,7 +192,7 @@ def convert_pvt_checkpoint(pvt_size, pvt_checkpoint, pytorch_dump_folder_path):
elif pvt_size == "large": elif pvt_size == "large":
expected_slice_logits = torch.tensor([0.3740, -0.7739, -0.4214]) expected_slice_logits = torch.tensor([0.3740, -0.7739, -0.4214])
else: else:
raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but " f"'{pvt_size}' was given") raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but '{pvt_size}' was given")
assert torch.allclose(logits[0, :3], expected_slice_logits, atol=1e-4) assert torch.allclose(logits[0, :3], expected_slice_logits, atol=1e-4)

View File

@ -203,8 +203,7 @@ def convert_pvt_v2_checkpoint(pvt_v2_size, pvt_v2_checkpoint, pytorch_dump_folde
config_path = "OpenGVLab/pvt_v2_b5" config_path = "OpenGVLab/pvt_v2_b5"
else: else:
raise ValueError( raise ValueError(
f"Available model sizes: 'b0', 'b1', 'b2', 'b2-linear', 'b3', 'b4', 'b5', but " f"Available model sizes: 'b0', 'b1', 'b2', 'b2-linear', 'b3', 'b4', 'b5', but '{pvt_v2_size}' was given"
f"'{pvt_v2_size}' was given"
) )
config = PvtV2Config.from_pretrained(config_path) config = PvtV2Config.from_pretrained(config_path)
# load original model from https://github.com/whai362/PVT # load original model from https://github.com/whai362/PVT
@ -248,9 +247,9 @@ def convert_pvt_v2_checkpoint(pvt_v2_size, pvt_v2_checkpoint, pytorch_dump_folde
f"'{pvt_v2_size}' was given" f"'{pvt_v2_size}' was given"
) )
assert torch.allclose( assert torch.allclose(logits[0, :3], expected_slice_logits, atol=1e-4), (
logits[0, :3], expected_slice_logits, atol=1e-4 "ImageNet weights not converted successfully."
), "ImageNet weights not converted successfully." )
print("ImageNet weights verified, conversion successful.") print("ImageNet weights verified, conversion successful.")

View File

@ -623,9 +623,9 @@ class Qwen2AudioEncoder(Qwen2AudioPreTrainedModel):
# check if head_mask has a correct number of layers specified if desired # check if head_mask has a correct number of layers specified if desired
if head_mask is not None: if head_mask is not None:
assert head_mask.size()[0] == ( assert head_mask.size()[0] == (len(self.layers)), (
len(self.layers) f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." )
for idx, encoder_layer in enumerate(self.layers): for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states: if output_hidden_states:

View File

@ -494,9 +494,9 @@ class RagModel(RagPreTrainedModel):
retriever: Optional[RagRetriever] = None, # or maybe just use a `set_retriever(...)` method retriever: Optional[RagRetriever] = None, # or maybe just use a `set_retriever(...)` method
**kwargs, **kwargs,
): ):
assert config is not None or ( assert config is not None or (question_encoder is not None and generator is not None), (
question_encoder is not None and generator is not None "Either a configuration or an question_encoder and a generator has to be provided."
), "Either a configuration or an question_encoder and a generator has to be provided." )
if config is None: if config is None:
config = RagConfig.from_question_encoder_generator_configs( config = RagConfig.from_question_encoder_generator_configs(
@ -517,9 +517,9 @@ class RagModel(RagPreTrainedModel):
self.retriever = retriever self.retriever = retriever
if self.retriever is not None: if self.retriever is not None:
assert isinstance( assert isinstance(retriever, RagRetriever), (
retriever, RagRetriever f"`self.retriever` is of type {type(self.retriever)}, but should be of type `RagRetriever`"
), f"`self.retriever` is of type {type(self.retriever)}, but should be of type `RagRetriever`" )
self.retriever = retriever self.retriever = retriever
self.question_encoder = question_encoder self.question_encoder = question_encoder
@ -660,9 +660,9 @@ class RagModel(RagPreTrainedModel):
" retriever using the `set_retriever(...)` function." " retriever using the `set_retriever(...)` function."
) )
assert ( assert doc_scores is not None, (
doc_scores is not None "Make sure that `doc_scores` are passed when passing `encoder_outputs` to the forward function."
), "Make sure that `doc_scores` are passed when passing `encoder_outputs` to the forward function." )
assert (doc_scores.shape[1] % n_docs) == 0, ( assert (doc_scores.shape[1] % n_docs) == 0, (
f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is" f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is"
@ -740,9 +740,9 @@ class RagSequenceForGeneration(RagPreTrainedModel):
retriever: Optional[RagRetriever] = None, retriever: Optional[RagRetriever] = None,
**kwargs, **kwargs,
): ):
assert config is not None or ( assert config is not None or (question_encoder is not None and generator is not None), (
question_encoder is not None and generator is not None "Either a configuration or an encoder and a generator has to be provided."
), "Either a configuration or an encoder and a generator has to be provided." )
if config is None: if config is None:
config = RagConfig.from_question_encoder_generator_configs( config = RagConfig.from_question_encoder_generator_configs(
@ -973,9 +973,9 @@ class RagSequenceForGeneration(RagPreTrainedModel):
) )
num_beams = num_beams if num_beams is not None else self.config.num_beams num_beams = num_beams if num_beams is not None else self.config.num_beams
assert ( assert input_ids is not None or context_input_ids is not None, (
input_ids is not None or context_input_ids is not None " At least one of input_ids or context_input_ids must be given"
), " At least one of input_ids or context_input_ids must be given" )
if self.retriever is not None and context_input_ids is None: if self.retriever is not None and context_input_ids is None:
question_hidden_states = self.question_encoder(input_ids, attention_mask=attention_mask)[0] question_hidden_states = self.question_encoder(input_ids, attention_mask=attention_mask)[0]
@ -1138,9 +1138,9 @@ class RagTokenForGeneration(RagPreTrainedModel):
retriever: Optional[RagRetriever] = None, retriever: Optional[RagRetriever] = None,
**kwargs, **kwargs,
): ):
assert config is not None or ( assert config is not None or (question_encoder is not None and generator is not None), (
question_encoder is not None and generator is not None "Either a configuration or an encoder and a generator has to be provided."
), "Either a configuration or an encoder and a generator has to be provided." )
if config is None: if config is None:
config = RagConfig.from_question_encoder_generator_configs( config = RagConfig.from_question_encoder_generator_configs(

View File

@ -506,9 +506,9 @@ class TFRagModel(TFRagPreTrainedModel):
load_weight_prefix: Optional[str] = None, load_weight_prefix: Optional[str] = None,
**kwargs, **kwargs,
): ):
assert config is not None or ( assert config is not None or (question_encoder is not None and generator is not None), (
question_encoder is not None and generator is not None "Either a configuration or an question_encoder and a generator has to be provided."
), "Either a configuration or an question_encoder and a generator has to be provided." )
if config is None: if config is None:
config = RagConfig.from_question_encoder_generator_configs( config = RagConfig.from_question_encoder_generator_configs(
@ -533,9 +533,9 @@ class TFRagModel(TFRagPreTrainedModel):
self.retriever = retriever self.retriever = retriever
if self.retriever is not None: if self.retriever is not None:
assert isinstance( assert isinstance(retriever, RagRetriever), (
retriever, RagRetriever f"`self.retriever` is of type {type(self.retriever)}, but should be of type `RagRetriever`"
), f"`self.retriever` is of type {type(self.retriever)}, but should be of type `RagRetriever`" )
self.retriever = retriever self.retriever = retriever
self.question_encoder = question_encoder self.question_encoder = question_encoder
@ -589,9 +589,9 @@ class TFRagModel(TFRagPreTrainedModel):
>>> input_ids = input_dict["input_ids"] >>> input_ids = input_dict["input_ids"]
>>> outputs = model(input_ids) >>> outputs = model(input_ids)
```""" ```"""
assert ( assert "decoder_cached_states" not in kwargs, (
"decoder_cached_states" not in kwargs "Please use past_key_values to cache intermediate outputs"
), "Please use past_key_values to cache intermediate outputs" # from modeling_tf_bart.py ) # from modeling_tf_bart.py
# aliasing to minimize code changing # aliasing to minimize code changing
n_docs = n_docs if n_docs is not None else self.config.n_docs n_docs = n_docs if n_docs is not None else self.config.n_docs
@ -657,9 +657,9 @@ class TFRagModel(TFRagPreTrainedModel):
" retriever using the `set_retriever(...)` function." " retriever using the `set_retriever(...)` function."
) )
assert ( assert doc_scores is not None, (
doc_scores is not None "Make sure that `doc_scores` are passed when passing `encoder_outputs` to the forward function."
), "Make sure that `doc_scores` are passed when passing `encoder_outputs` to the forward function." )
assert (doc_scores.shape[1] % n_docs) == 0, ( assert (doc_scores.shape[1] % n_docs) == 0, (
f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is" f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is"
@ -747,9 +747,9 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss
retriever: Optional[RagRetriever] = None, retriever: Optional[RagRetriever] = None,
**kwargs, **kwargs,
): ):
assert config is not None or ( assert config is not None or (question_encoder is not None and generator is not None), (
question_encoder is not None and generator is not None "Either a configuration or an encoder and a generator has to be provided."
), "Either a configuration or an encoder and a generator has to be provided." )
if config is None: if config is None:
config = RagConfig.from_question_encoder_generator_configs( config = RagConfig.from_question_encoder_generator_configs(
@ -939,9 +939,9 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss
>>> generated_string = tokenizer.batch_decode(generated, skip_special_tokens=True) >>> generated_string = tokenizer.batch_decode(generated, skip_special_tokens=True)
```""" ```"""
assert ( assert "decoder_cached_states" not in kwargs, (
"decoder_cached_states" not in kwargs "Please use past_key_values to cache intermediate outputs"
), "Please use past_key_values to cache intermediate outputs" # from modeling_tf_bart.py ) # from modeling_tf_bart.py
do_marginalize = do_marginalize if do_marginalize else self.config.do_marginalize do_marginalize = do_marginalize if do_marginalize else self.config.do_marginalize
reduce_loss = reduce_loss if reduce_loss else self.config.reduce_loss reduce_loss = reduce_loss if reduce_loss else self.config.reduce_loss
@ -1327,9 +1327,9 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL
retriever: Optional[RagRetriever] = None, retriever: Optional[RagRetriever] = None,
**kwargs, **kwargs,
): ):
assert config is not None or ( assert config is not None or (question_encoder is not None and generator is not None), (
question_encoder is not None and generator is not None "Either a configuration or an encoder and a generator has to be provided."
), "Either a configuration or an encoder and a generator has to be provided." )
if config is None: if config is None:
config = RagConfig.from_question_encoder_generator_configs( config = RagConfig.from_question_encoder_generator_configs(
@ -1454,9 +1454,9 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL
>>> generated_string = tokenizer.batch_decode(generated, skip_special_tokens=True) >>> generated_string = tokenizer.batch_decode(generated, skip_special_tokens=True)
```""" ```"""
assert ( assert "decoder_cached_states" not in kwargs, (
"decoder_cached_states" not in kwargs "Please use past_key_values to cache intermediate outputs"
), "Please use past_key_values to cache intermediate outputs" # from modeling_tf_bart.py ) # from modeling_tf_bart.py
exclude_bos_score = exclude_bos_score if exclude_bos_score else self.config.exclude_bos_score exclude_bos_score = exclude_bos_score if exclude_bos_score else self.config.exclude_bos_score
reduce_loss = reduce_loss if reduce_loss else self.config.reduce_loss reduce_loss = reduce_loss if reduce_loss else self.config.reduce_loss
@ -1663,9 +1663,9 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL
) )
num_beams = num_beams if num_beams is not None else self.config.num_beams num_beams = num_beams if num_beams is not None else self.config.num_beams
assert ( assert input_ids is not None or context_input_ids is not None, (
input_ids is not None or context_input_ids is not None " At least one of input_ids or context_input_ids must be given"
), " At least one of input_ids or context_input_ids must be given" )
if self.retriever is not None and context_input_ids is None: if self.retriever is not None and context_input_ids is None:
question_hidden_states = self.question_encoder(input_ids, attention_mask=attention_mask)[0] question_hidden_states = self.question_encoder(input_ids, attention_mask=attention_mask)[0]

View File

@ -156,9 +156,9 @@ class LegacyIndex(Index):
) )
with open(resolved_meta_path, "rb") as metadata_file: with open(resolved_meta_path, "rb") as metadata_file:
self.index_id_to_db_id = pickle.load(metadata_file) self.index_id_to_db_id = pickle.load(metadata_file)
assert ( assert len(self.index_id_to_db_id) == self.index.ntotal, (
len(self.index_id_to_db_id) == self.index.ntotal "Deserialized index_id_to_db_id should match faiss index size"
), "Deserialized index_id_to_db_id should match faiss index size" )
def is_initialized(self): def is_initialized(self):
return self._index_initialized return self._index_initialized

View File

@ -150,15 +150,15 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size):
position_embeddings = torch_model_reformer.embeddings.position_embeddings position_embeddings = torch_model_reformer.embeddings.position_embeddings
for emb_idx in range(len(position_embeddings.weights)): for emb_idx in range(len(position_embeddings.weights)):
emb_weights = np.asarray(weights[3][emb_idx][0]) emb_weights = np.asarray(weights[3][emb_idx][0])
assert ( assert position_embeddings.weights[emb_idx].shape == emb_weights.shape, (
position_embeddings.weights[emb_idx].shape == emb_weights.shape f"{position_embeddings[emb_idx]} emb does not match"
), f"{position_embeddings[emb_idx]} emb does not match" )
position_embeddings.weights[emb_idx] = nn.Parameter(torch.tensor(emb_weights)) position_embeddings.weights[emb_idx] = nn.Parameter(torch.tensor(emb_weights))
trax_layer_weights = weights[5] trax_layer_weights = weights[5]
assert len(torch_model_reformer.encoder.layers) * 4 == len( assert len(torch_model_reformer.encoder.layers) * 4 == len(trax_layer_weights), (
trax_layer_weights "HF and trax model do not have the same number of layers"
), "HF and trax model do not have the same number of layers" )
for layer_idx, layer in enumerate(torch_model_reformer.encoder.layers): for layer_idx, layer in enumerate(torch_model_reformer.encoder.layers):
block_weights = trax_layer_weights[4 * layer_idx : 4 * (layer_idx + 1)] block_weights = trax_layer_weights[4 * layer_idx : 4 * (layer_idx + 1)]
set_block_weights_in_torch(block_weights, layer, hidden_size) set_block_weights_in_torch(block_weights, layer, hidden_size)

View File

@ -446,12 +446,12 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
# free memory # free memory
del hidden_states del hidden_states
assert ( assert query_key_vectors.shape[-1] == self.attention_head_size, (
query_key_vectors.shape[-1] == self.attention_head_size f"last dim of query_key_vectors is {query_key_vectors.shape[-1]} but should be {self.attention_head_size}."
), f"last dim of query_key_vectors is {query_key_vectors.shape[-1]} but should be {self.attention_head_size}." )
assert ( assert value_vectors.shape[-1] == self.attention_head_size, (
value_vectors.shape[-1] == self.attention_head_size f"last dim of value_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}."
), f"last dim of value_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}." )
do_standard_self_attention = (sequence_length <= self.chunk_length) or ( do_standard_self_attention = (sequence_length <= self.chunk_length) or (
use_cache and past_buckets_states[1] is not None use_cache and past_buckets_states[1] is not None
@ -470,9 +470,9 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
# make sure buckets has correct shape for LSH attention # make sure buckets has correct shape for LSH attention
buckets = buckets.view(batch_size, self.num_attention_heads, num_hashes * sequence_length) buckets = buckets.view(batch_size, self.num_attention_heads, num_hashes * sequence_length)
assert ( assert int(buckets.shape[-1]) == num_hashes * sequence_length, (
int(buckets.shape[-1]) == num_hashes * sequence_length f"last dim of buckets is {buckets.shape[-1]}, but should be {num_hashes * sequence_length}"
), f"last dim of buckets is {buckets.shape[-1]}, but should be {num_hashes * sequence_length}" )
sorted_bucket_idx, undo_sorted_bucket_idx = self._get_sorted_bucket_idx_and_undo_sorted_bucket_idx( sorted_bucket_idx, undo_sorted_bucket_idx = self._get_sorted_bucket_idx_and_undo_sorted_bucket_idx(
sequence_length, buckets, num_hashes sequence_length, buckets, num_hashes
@ -612,18 +612,18 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
# We sample a different random rotation for each round of hashing to # We sample a different random rotation for each round of hashing to
# decrease the probability of hash misses. # decrease the probability of hash misses.
if isinstance(self.num_buckets, int): if isinstance(self.num_buckets, int):
assert ( assert self.num_buckets % 2 == 0, (
self.num_buckets % 2 == 0 f"There should be an even number of buckets, but `self.num_buckets`: {self.num_buckets}"
), f"There should be an even number of buckets, but `self.num_buckets`: {self.num_buckets}" )
rotation_size = self.num_buckets rotation_size = self.num_buckets
num_buckets = self.num_buckets num_buckets = self.num_buckets
else: else:
# Factorize the hash if self.num_buckets is a list or tuple # Factorize the hash if self.num_buckets is a list or tuple
rotation_size, num_buckets = 0, 1 rotation_size, num_buckets = 0, 1
for bucket_factor in self.num_buckets: for bucket_factor in self.num_buckets:
assert ( assert bucket_factor % 2 == 0, (
bucket_factor % 2 == 0 f"The number of buckets should be even, but `num_bucket`: {bucket_factor}"
), f"The number of buckets should be even, but `num_bucket`: {bucket_factor}" )
rotation_size = rotation_size + bucket_factor rotation_size = rotation_size + bucket_factor
num_buckets = num_buckets * bucket_factor num_buckets = num_buckets * bucket_factor
@ -1090,15 +1090,15 @@ class LocalSelfAttention(nn.Module, EfficientAttentionMixin):
key_vectors = self._split_hidden_size_dim(key_vectors, self.num_attention_heads, self.attention_head_size) key_vectors = self._split_hidden_size_dim(key_vectors, self.num_attention_heads, self.attention_head_size)
value_vectors = self._split_hidden_size_dim(value_vectors, self.num_attention_heads, self.attention_head_size) value_vectors = self._split_hidden_size_dim(value_vectors, self.num_attention_heads, self.attention_head_size)
assert ( assert query_vectors.shape[-1] == self.attention_head_size, (
query_vectors.shape[-1] == self.attention_head_size f"last dim of query_key_vectors is {query_vectors.shape[-1]} but should be {self.attention_head_size}."
), f"last dim of query_key_vectors is {query_vectors.shape[-1]} but should be {self.attention_head_size}." )
assert ( assert key_vectors.shape[-1] == self.attention_head_size, (
key_vectors.shape[-1] == self.attention_head_size f"last dim of query_key_vectors is {key_vectors.shape[-1]} but should be {self.attention_head_size}."
), f"last dim of query_key_vectors is {key_vectors.shape[-1]} but should be {self.attention_head_size}." )
assert ( assert value_vectors.shape[-1] == self.attention_head_size, (
value_vectors.shape[-1] == self.attention_head_size f"last dim of query_key_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}."
), f"last dim of query_key_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}." )
if self.chunk_length is None: if self.chunk_length is None:
assert self.num_chunks_before == 0 and self.num_chunks_after == 0, ( assert self.num_chunks_before == 0 and self.num_chunks_after == 0, (
@ -1976,9 +1976,9 @@ class ReformerModel(ReformerPreTrainedModel):
def __init__(self, config): def __init__(self, config):
super().__init__(config) super().__init__(config)
self.config = config self.config = config
assert ( assert self.config.num_hidden_layers > 0, (
self.config.num_hidden_layers > 0 "`config.attn_layers` is empty. Select at least one attn layer form ['lsh', 'local']"
), "`config.attn_layers` is empty. Select at least one attn layer form ['lsh', 'local']" )
self.embeddings = ReformerEmbeddings(config) self.embeddings = ReformerEmbeddings(config)
self.encoder = ReformerEncoder(config) self.encoder = ReformerEncoder(config)
@ -2039,9 +2039,9 @@ class ReformerModel(ReformerPreTrainedModel):
else: else:
raise ValueError("You have to specify either input_ids or inputs_embeds") raise ValueError("You have to specify either input_ids or inputs_embeds")
assert ( assert len(input_shape) == 2, (
len(input_shape) == 2 f"`input_ids` have be of shape `[batch_size, sequence_length]`, but got shape: {input_shape}"
), f"`input_ids` have be of shape `[batch_size, sequence_length]`, but got shape: {input_shape}" )
if past_buckets_states is not None: if past_buckets_states is not None:
assert not self.training, "`past_buckets_states` can only be used for inference, not for training`." assert not self.training, "`past_buckets_states` can only be used for inference, not for training`."

View File

@ -2869,7 +2869,7 @@ class SeamlessM4TForTextToText(SeamlessM4TPreTrainedModel, GenerationMixin):
if tgt_lang not in self.generation_config.text_decoder_lang_to_code_id: if tgt_lang not in self.generation_config.text_decoder_lang_to_code_id:
raise ValueError( raise ValueError(
f"""`tgt_lang={tgt_lang}` is not supported by this model. Please specify a `tgt_lang` in f"""`tgt_lang={tgt_lang}` is not supported by this model. Please specify a `tgt_lang` in
{', '.join(self.generation_config.text_decoder_lang_to_code_id.keys())}""" {", ".join(self.generation_config.text_decoder_lang_to_code_id.keys())}"""
) )
# tgt_lang gets priority over decoder input ids # tgt_lang gets priority over decoder input ids
text_tgt_lang_id = self.generation_config.text_decoder_lang_to_code_id.get(tgt_lang) text_tgt_lang_id = self.generation_config.text_decoder_lang_to_code_id.get(tgt_lang)
@ -3140,7 +3140,7 @@ class SeamlessM4TForSpeechToText(SeamlessM4TPreTrainedModel, GenerationMixin):
if tgt_lang not in self.generation_config.text_decoder_lang_to_code_id: if tgt_lang not in self.generation_config.text_decoder_lang_to_code_id:
raise ValueError( raise ValueError(
f"""`tgt_lang={tgt_lang}` is not supported by this model. Please specify a `tgt_lang` in f"""`tgt_lang={tgt_lang}` is not supported by this model. Please specify a `tgt_lang` in
{', '.join(self.generation_config.text_decoder_lang_to_code_id.keys())}""" {", ".join(self.generation_config.text_decoder_lang_to_code_id.keys())}"""
) )
# tgt_lang gets priority over decoder input ids # tgt_lang gets priority over decoder input ids
text_tgt_lang_id = self.generation_config.text_decoder_lang_to_code_id.get(tgt_lang) text_tgt_lang_id = self.generation_config.text_decoder_lang_to_code_id.get(tgt_lang)
@ -3407,7 +3407,7 @@ class SeamlessM4TForTextToSpeech(SeamlessM4TPreTrainedModel, GenerationMixin):
elif tgt_lang not in lang_code_to_id: elif tgt_lang not in lang_code_to_id:
raise ValueError( raise ValueError(
f"""`tgt_lang={tgt_lang}` is not supported by this model. f"""`tgt_lang={tgt_lang}` is not supported by this model.
Please specify a `tgt_lang` in {','.join(lang_code_to_id.keys())}. Note that SeamlessM4T supports Please specify a `tgt_lang` in {",".join(lang_code_to_id.keys())}. Note that SeamlessM4T supports
more languages for text translation than for speech synthesis.""" more languages for text translation than for speech synthesis."""
) )
@ -3736,7 +3736,7 @@ class SeamlessM4TForSpeechToSpeech(SeamlessM4TPreTrainedModel, GenerationMixin):
elif tgt_lang not in lang_code_to_id: elif tgt_lang not in lang_code_to_id:
raise ValueError( raise ValueError(
f"""`tgt_lang={tgt_lang}` is not supported by this model. f"""`tgt_lang={tgt_lang}` is not supported by this model.
Please specify a `tgt_lang` in {','.join(lang_code_to_id.keys())}. Note that SeamlessM4T supports Please specify a `tgt_lang` in {",".join(lang_code_to_id.keys())}. Note that SeamlessM4T supports
more languages for text translation than for speech synthesis.""" more languages for text translation than for speech synthesis."""
) )
@ -4151,7 +4151,7 @@ class SeamlessM4TModel(SeamlessM4TPreTrainedModel, GenerationMixin):
elif tgt_lang not in lang_code_to_id: elif tgt_lang not in lang_code_to_id:
raise ValueError( raise ValueError(
f"""`tgt_lang={tgt_lang}` is not supported by this model. f"""`tgt_lang={tgt_lang}` is not supported by this model.
Please specify a `tgt_lang` in {','.join(lang_code_to_id.keys())}. Note that SeamlessM4T supports Please specify a `tgt_lang` in {",".join(lang_code_to_id.keys())}. Note that SeamlessM4T supports
more languages for text translation than for speech synthesis.""" more languages for text translation than for speech synthesis."""
) )

View File

@ -3149,7 +3149,7 @@ class SeamlessM4Tv2ForTextToText(SeamlessM4Tv2PreTrainedModel, GenerationMixin):
if tgt_lang not in self.generation_config.text_decoder_lang_to_code_id: if tgt_lang not in self.generation_config.text_decoder_lang_to_code_id:
raise ValueError( raise ValueError(
f"""`tgt_lang={tgt_lang}` is not supported by this model. Please specify a `tgt_lang` in f"""`tgt_lang={tgt_lang}` is not supported by this model. Please specify a `tgt_lang` in
{', '.join(self.generation_config.text_decoder_lang_to_code_id.keys())}""" {", ".join(self.generation_config.text_decoder_lang_to_code_id.keys())}"""
) )
# tgt_lang gets priority over decoder input ids # tgt_lang gets priority over decoder input ids
text_tgt_lang_id = self.generation_config.text_decoder_lang_to_code_id.get(tgt_lang) text_tgt_lang_id = self.generation_config.text_decoder_lang_to_code_id.get(tgt_lang)
@ -3430,7 +3430,7 @@ class SeamlessM4Tv2ForSpeechToText(SeamlessM4Tv2PreTrainedModel, GenerationMixin
if tgt_lang not in self.generation_config.text_decoder_lang_to_code_id: if tgt_lang not in self.generation_config.text_decoder_lang_to_code_id:
raise ValueError( raise ValueError(
f"""`tgt_lang={tgt_lang}` is not supported by this model. Please specify a `tgt_lang` in f"""`tgt_lang={tgt_lang}` is not supported by this model. Please specify a `tgt_lang` in
{', '.join(self.generation_config.text_decoder_lang_to_code_id.keys())}""" {", ".join(self.generation_config.text_decoder_lang_to_code_id.keys())}"""
) )
# tgt_lang gets priority over decoder input ids # tgt_lang gets priority over decoder input ids
text_tgt_lang_id = self.generation_config.text_decoder_lang_to_code_id.get(tgt_lang) text_tgt_lang_id = self.generation_config.text_decoder_lang_to_code_id.get(tgt_lang)
@ -3707,7 +3707,7 @@ class SeamlessM4Tv2ForTextToSpeech(SeamlessM4Tv2PreTrainedModel, GenerationMixin
elif tgt_lang not in lang_code_to_id: elif tgt_lang not in lang_code_to_id:
raise ValueError( raise ValueError(
f"""`tgt_lang={tgt_lang}` is not supported by this model. f"""`tgt_lang={tgt_lang}` is not supported by this model.
Please specify a `tgt_lang` in {','.join(lang_code_to_id.keys())}. Note that SeamlessM4Tv2 supports Please specify a `tgt_lang` in {",".join(lang_code_to_id.keys())}. Note that SeamlessM4Tv2 supports
more languages for text translation than for speech synthesis.""" more languages for text translation than for speech synthesis."""
) )
@ -4078,7 +4078,7 @@ class SeamlessM4Tv2ForSpeechToSpeech(SeamlessM4Tv2PreTrainedModel, GenerationMix
elif tgt_lang not in lang_code_to_id: elif tgt_lang not in lang_code_to_id:
raise ValueError( raise ValueError(
f"""`tgt_lang={tgt_lang}` is not supported by this model. f"""`tgt_lang={tgt_lang}` is not supported by this model.
Please specify a `tgt_lang` in {','.join(lang_code_to_id.keys())}. Note that SeamlessM4Tv2 supports Please specify a `tgt_lang` in {",".join(lang_code_to_id.keys())}. Note that SeamlessM4Tv2 supports
more languages for text translation than for speech synthesis.""" more languages for text translation than for speech synthesis."""
) )
@ -4539,7 +4539,7 @@ class SeamlessM4Tv2Model(SeamlessM4Tv2PreTrainedModel, GenerationMixin):
elif tgt_lang not in lang_code_to_id: elif tgt_lang not in lang_code_to_id:
raise ValueError( raise ValueError(
f"""`tgt_lang={tgt_lang}` is not supported by this model. f"""`tgt_lang={tgt_lang}` is not supported by this model.
Please specify a `tgt_lang` in {','.join(lang_code_to_id.keys())}. Note that SeamlessM4Tv2 supports Please specify a `tgt_lang` in {",".join(lang_code_to_id.keys())}. Note that SeamlessM4Tv2 supports
more languages for text translation than for speech synthesis.""" more languages for text translation than for speech synthesis."""
) )

View File

@ -192,41 +192,41 @@ def load_adapter(full_name, value, adapter, unused_weights):
if "proj_ln" in full_name: if "proj_ln" in full_name:
# has to be layer norm # has to be layer norm
if "bias" in name: if "bias" in name:
assert ( assert value.shape == adapter.proj_layer_norm.bias.data.shape, (
value.shape == adapter.proj_layer_norm.bias.data.shape f"{full_name} has size {value.shape}, but {adapter.proj_layer_norm.bias.data.shape} was found."
), f"{full_name} has size {value.shape}, but {adapter.proj_layer_norm.bias.data.shape} was found." )
adapter.proj_layer_norm.bias.data = value adapter.proj_layer_norm.bias.data = value
logger.info(f"Adapter proj layer norm bias was initialized from {full_name}.") logger.info(f"Adapter proj layer norm bias was initialized from {full_name}.")
if "weight" in name: if "weight" in name:
assert ( assert value.shape == adapter.proj_layer_norm.weight.data.shape, (
value.shape == adapter.proj_layer_norm.weight.data.shape f"{full_name} has size {value.shape}, but {adapter.proj_layer_norm.weight.data.shape} was found."
), f"{full_name} has size {value.shape}, but {adapter.proj_layer_norm.weight.data.shape} was found." )
adapter.proj_layer_norm.weight.data = value adapter.proj_layer_norm.weight.data = value
else: else:
# has to be projection layer # has to be projection layer
if "bias" in name: if "bias" in name:
assert ( assert value.shape == adapter.proj.bias.data.shape, (
value.shape == adapter.proj.bias.data.shape f"{full_name} has size {value.shape}, but {adapter.proj.bias.data.shape} was found."
), f"{full_name} has size {value.shape}, but {adapter.proj.bias.data.shape} was found." )
adapter.proj.bias.data = value adapter.proj.bias.data = value
logger.info(f"Adapter proj layer bias was initialized from {full_name}.") logger.info(f"Adapter proj layer bias was initialized from {full_name}.")
if "weight" in name: if "weight" in name:
assert ( assert value.shape == adapter.proj.weight.data.shape, (
value.shape == adapter.proj.weight.data.shape f"{full_name} has size {value.shape}, but {adapter.proj.weight.data.shape} was found."
), f"{full_name} has size {value.shape}, but {adapter.proj.weight.data.shape} was found." )
adapter.proj.weight.data = value adapter.proj.weight.data = value
logger.info(f"Adapter proj layer weight was initialized from {full_name}.") logger.info(f"Adapter proj layer weight was initialized from {full_name}.")
elif isinstance(layer_id, int): elif isinstance(layer_id, int):
if "bias" in name: if "bias" in name:
assert ( assert value.shape == adapter.layers[layer_id].conv.bias.data.shape, (
value.shape == adapter.layers[layer_id].conv.bias.data.shape f"{full_name} has size {value.shape}, but {adapter.layers[layer_id].conv.bias.data.shape} was found."
), f"{full_name} has size {value.shape}, but {adapter.layers[layer_id].conv.bias.data.shape} was found." )
adapter.layers[layer_id].conv.bias.data = value adapter.layers[layer_id].conv.bias.data = value
logger.info(f"Adapter layer {layer_id} bias was initialized from {full_name}.") logger.info(f"Adapter layer {layer_id} bias was initialized from {full_name}.")
elif "weight" in name: elif "weight" in name:
assert ( assert value.shape == adapter.layers[layer_id].conv.weight.data.shape, (
value.shape == adapter.layers[layer_id].conv.weight.data.shape f"{full_name} has size {value.shape}, but {adapter.layers[layer_id].conv.weight.data.shape} was found."
), f"{full_name} has size {value.shape}, but {adapter.layers[layer_id].conv.weight.data.shape} was found." )
adapter.layers[layer_id].conv.weight.data = value adapter.layers[layer_id].conv.weight.data = value
logger.info(f"Adapter layer {layer_id} bias was initialized from {full_name}.") logger.info(f"Adapter layer {layer_id} bias was initialized from {full_name}.")
else: else:

View File

@ -774,9 +774,9 @@ class Speech2TextEncoder(Speech2TextPreTrainedModel):
# check if head_mask has a correct number of layers specified if desired # check if head_mask has a correct number of layers specified if desired
if head_mask is not None: if head_mask is not None:
assert head_mask.size()[0] == ( assert head_mask.size()[0] == (len(self.layers)), (
len(self.layers) f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." )
for idx, encoder_layer in enumerate(self.layers): for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states: if output_hidden_states:

View File

@ -224,9 +224,9 @@ def convert_swin2sr_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to
[[-0.5238, -0.5557, -0.6321], [-0.6016, -0.5903, -0.6391], [-0.6244, -0.6334, -0.6889]] [[-0.5238, -0.5557, -0.6321], [-0.6016, -0.5903, -0.6391], [-0.6244, -0.6334, -0.6889]]
) )
assert ( assert outputs.reconstruction.shape == expected_shape, (
outputs.reconstruction.shape == expected_shape f"Shape of reconstruction should be {expected_shape}, but is {outputs.reconstruction.shape}"
), f"Shape of reconstruction should be {expected_shape}, but is {outputs.reconstruction.shape}" )
assert torch.allclose(outputs.reconstruction[0, 0, :3, :3], expected_slice, atol=1e-3) assert torch.allclose(outputs.reconstruction[0, 0, :3, :3], expected_slice, atol=1e-3)
print("Looks ok!") print("Looks ok!")

View File

@ -116,7 +116,9 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, max_shard_size, dtype, w
total_size += weight_size total_size += weight_size
# Add the last block # Add the last block
save_path = os.path.join(dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts)+1:05d}-of-???.bin")) save_path = os.path.join(
dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts) + 1:05d}-of-???.bin")
)
rename_and_save_block(current_block, save_path) rename_and_save_block(current_block, save_path)
sharded_state_dicts.append(current_block.keys()) sharded_state_dicts.append(current_block.keys())

View File

@ -363,9 +363,9 @@ class TFT5Attention(keras.layers.Layer):
real_seq_length = seq_length real_seq_length = seq_length
if past_key_value is not None: if past_key_value is not None:
assert ( assert len(past_key_value) == 2, (
len(past_key_value) == 2 f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states"
), f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states" )
real_seq_length += shape_list(past_key_value[0])[2] if query_length is None else query_length real_seq_length += shape_list(past_key_value[0])[2] if query_length is None else query_length
key_length = real_seq_length if key_value_states is None else shape_list(key_value_states)[1] key_length = real_seq_length if key_value_states is None else shape_list(key_value_states)[1]

View File

@ -1284,9 +1284,9 @@ class TapasForQuestionAnswering(TapasPreTrainedModel):
aggregate_mask = None aggregate_mask = None
else: else:
if float_answer is not None: if float_answer is not None:
assert ( assert labels.shape[0] == float_answer.shape[0], (
labels.shape[0] == float_answer.shape[0] "Make sure the answers are a FloatTensor of shape (batch_size,)"
), "Make sure the answers are a FloatTensor of shape (batch_size,)" )
# <float32>[batch_size] # <float32>[batch_size]
aggregate_mask = _calculate_aggregate_mask( aggregate_mask = _calculate_aggregate_mask(
float_answer, float_answer,
@ -1336,9 +1336,9 @@ class TapasForQuestionAnswering(TapasPreTrainedModel):
if is_supervised: if is_supervised:
# Note that `aggregate_mask` is None if the setting is supervised. # Note that `aggregate_mask` is None if the setting is supervised.
if aggregation_labels is not None: if aggregation_labels is not None:
assert ( assert labels.shape[0] == aggregation_labels.shape[0], (
labels.shape[0] == aggregation_labels.shape[0] "Make sure the aggregation labels are a LongTensor of shape (batch_size,)"
), "Make sure the aggregation labels are a LongTensor of shape (batch_size,)" )
per_example_additional_loss = _calculate_aggregation_loss( per_example_additional_loss = _calculate_aggregation_loss(
logits_aggregation, logits_aggregation,
aggregate_mask, aggregate_mask,

View File

@ -1562,9 +1562,9 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
aggregate_mask = None aggregate_mask = None
else: else:
if float_answer is not None: if float_answer is not None:
assert ( assert shape_list(labels)[0] == shape_list(float_answer)[0], (
shape_list(labels)[0] == shape_list(float_answer)[0] "Make sure the answers are a FloatTensor of shape (batch_size,)"
), "Make sure the answers are a FloatTensor of shape (batch_size,)" )
# <float32>[batch_size] # <float32>[batch_size]
aggregate_mask = _calculate_aggregate_mask( aggregate_mask = _calculate_aggregate_mask(
float_answer, float_answer,
@ -1615,9 +1615,9 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
if is_supervised: if is_supervised:
# Note that `aggregate_mask` is None if the setting is supervised. # Note that `aggregate_mask` is None if the setting is supervised.
if aggregation_labels is not None: if aggregation_labels is not None:
assert ( assert shape_list(labels)[0] == shape_list(aggregation_labels)[0], (
shape_list(labels)[0] == shape_list(aggregation_labels)[0] "Make sure the aggregation labels are a LongTensor of shape (batch_size,)"
), "Make sure the aggregation labels are a LongTensor of shape (batch_size,)" )
per_example_additional_loss = _calculate_aggregation_loss( per_example_additional_loss = _calculate_aggregation_loss(
logits_aggregation, logits_aggregation,
aggregate_mask, aggregate_mask,

View File

@ -773,7 +773,7 @@ TVP_PROMPTER_CLASSES_MAPPING = {
@add_start_docstrings( @add_start_docstrings(
"The bare Tvp Model transformer outputting BaseModelOutputWithPooling object without any specific head on" " top.", "The bare Tvp Model transformer outputting BaseModelOutputWithPooling object without any specific head on top.",
TVP_START_DOCSTRING, TVP_START_DOCSTRING,
) )
class TvpModel(TvpPreTrainedModel): class TvpModel(TvpPreTrainedModel):

View File

@ -407,8 +407,7 @@ class UdopPatchEmbeddings(nn.Module):
batch_size, num_channels, height, width = pixel_values.shape batch_size, num_channels, height, width = pixel_values.shape
if height != self.image_size[0] or width != self.image_size[1]: if height != self.image_size[0] or width != self.image_size[1]:
raise ValueError( raise ValueError(
f"Input image size ({height}*{width}) doesn't match model" f"Input image size ({height}*{width}) doesn't match model ({self.image_size[0]}*{self.image_size[1]})."
f" ({self.image_size[0]}*{self.image_size[1]})."
) )
embeddings = self.proj(pixel_values) embeddings = self.proj(pixel_values)
embeddings = embeddings.flatten(2).transpose(1, 2) embeddings = embeddings.flatten(2).transpose(1, 2)

View File

@ -84,9 +84,9 @@ def convert_visual_bert_checkpoint(checkpoint_path, pytorch_dump_folder_path):
Copy/paste/tweak model's weights to our VisualBERT structure. Copy/paste/tweak model's weights to our VisualBERT structure.
""" """
assert ( assert checkpoint_path.split("/")[-1] in ACCEPTABLE_CHECKPOINTS, (
checkpoint_path.split("/")[-1] in ACCEPTABLE_CHECKPOINTS f"The checkpoint provided must be in {ACCEPTABLE_CHECKPOINTS}."
), f"The checkpoint provided must be in {ACCEPTABLE_CHECKPOINTS}." )
# Get Config # Get Config
if "pre" in checkpoint_path: if "pre" in checkpoint_path:

View File

@ -72,8 +72,7 @@ class VivitTubeletEmbeddings(nn.Module):
batch_size, num_frames, num_channels, height, width = pixel_values.shape batch_size, num_frames, num_channels, height, width = pixel_values.shape
if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size):
raise ValueError( raise ValueError(
f"Image image size ({height}*{width}) doesn't match model" f"Image image size ({height}*{width}) doesn't match model ({self.image_size[0]}*{self.image_size[1]})."
f" ({self.image_size[0]}*{self.image_size[1]})."
) )
# permute to (batch_size, num_channels, num_frames, height, width) # permute to (batch_size, num_channels, num_frames, height, width)

View File

@ -1043,9 +1043,9 @@ class WhisperEncoder(WhisperPreTrainedModel):
# check if head_mask has a correct number of layers specified if desired # check if head_mask has a correct number of layers specified if desired
if head_mask is not None: if head_mask is not None:
assert head_mask.size()[0] == ( assert head_mask.size()[0] == (len(self.layers)), (
len(self.layers) f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." )
for idx, encoder_layer in enumerate(self.layers): for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states: if output_hidden_states:

View File

@ -167,7 +167,7 @@ class XCLIPVisionEmbeddings(nn.Module):
batch_size, _, height, width = pixel_values.shape batch_size, _, height, width = pixel_values.shape
if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size):
raise ValueError( raise ValueError(
f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})."
) )
target_dtype = self.patch_embedding.weight.dtype target_dtype = self.patch_embedding.weight.dtype
patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid]

View File

@ -601,8 +601,7 @@ class XGLMModel(XGLMPreTrainedModel):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning_once( logger.warning_once(
"`use_cache = True` is incompatible with gradient checkpointing`. Setting `use_cache =" "`use_cache = True` is incompatible with gradient checkpointing`. Setting `use_cache = False`..."
" False`..."
) )
use_cache = False use_cache = False

View File

@ -164,15 +164,15 @@ def load_tf_weights_in_xlnet(model, config, tf_path):
array = np.transpose(array) array = np.transpose(array)
if isinstance(pointer, list): if isinstance(pointer, list):
# Here we will split the TF weights # Here we will split the TF weights
assert ( assert len(pointer) == array.shape[0], (
len(pointer) == array.shape[0] f"Pointer length {len(pointer)} and array length {array.shape[0]} mismatched"
), f"Pointer length {len(pointer)} and array length {array.shape[0]} mismatched" )
for i, p_i in enumerate(pointer): for i, p_i in enumerate(pointer):
arr_i = array[i, ...] arr_i = array[i, ...]
try: try:
assert ( assert p_i.shape == arr_i.shape, (
p_i.shape == arr_i.shape f"Pointer shape {p_i.shape} and array shape {arr_i.shape} mismatched"
), f"Pointer shape {p_i.shape} and array shape {arr_i.shape} mismatched" )
except AssertionError as e: except AssertionError as e:
e.args += (p_i.shape, arr_i.shape) e.args += (p_i.shape, arr_i.shape)
raise raise
@ -180,9 +180,9 @@ def load_tf_weights_in_xlnet(model, config, tf_path):
p_i.data = torch.from_numpy(arr_i) p_i.data = torch.from_numpy(arr_i)
else: else:
try: try:
assert ( assert pointer.shape == array.shape, (
pointer.shape == array.shape f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" )
except AssertionError as e: except AssertionError as e:
e.args += (pointer.shape, array.shape) e.args += (pointer.shape, array.shape)
raise raise

View File

@ -203,9 +203,9 @@ class ZambaConfig(PretrainedConfig):
self.layers_block_type = self._layers_block_type(num_hidden_layers, attn_layer_period, attn_layer_offset) self.layers_block_type = self._layers_block_type(num_hidden_layers, attn_layer_period, attn_layer_offset)
assert ( assert (self.mamba_expand * self.hidden_size) % self.n_mamba_heads == 0, (
self.mamba_expand * self.hidden_size "`intermediate_size` should be divisible by `n_mamba_heads`."
) % self.n_mamba_heads == 0, "`intermediate_size` should be divisible by `n_mamba_heads`." )
super().__init__( super().__init__(
pad_token_id=pad_token_id, pad_token_id=pad_token_id,

View File

@ -339,7 +339,7 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
) )
if self.model_type == ModelType.VisionEncoderDecoder: if self.model_type == ModelType.VisionEncoderDecoder:
task_prompt = f'<s_docvqa><s_question>{input["question"]}</s_question><s_answer>' task_prompt = f"<s_docvqa><s_question>{input['question']}</s_question><s_answer>"
# Adapted from https://huggingface.co/spaces/nielsr/donut-docvqa/blob/main/app.py # Adapted from https://huggingface.co/spaces/nielsr/donut-docvqa/blob/main/app.py
encoding = { encoding = {
"inputs": image_features["pixel_values"], "inputs": image_features["pixel_values"],

View File

@ -104,8 +104,7 @@ class FbgemmFp8HfQuantizer(HfQuantizer):
) )
elif torch_dtype == torch.float16: elif torch_dtype == torch.float16:
raise ValueError( raise ValueError(
"You cannot use FP8 with torch_dtype=torch.float16." "You cannot use FP8 with torch_dtype=torch.float16.We recommend you passing torch_dtype=torch.bfloat16"
"We recommend you passing torch_dtype=torch.bfloat16"
) )
return torch_dtype return torch_dtype

View File

@ -257,8 +257,7 @@ class TorchAoHfQuantizer(HfQuantizer):
def is_serializable(self, safe_serialization=None) -> bool: def is_serializable(self, safe_serialization=None) -> bool:
if safe_serialization: if safe_serialization:
logger.warning( logger.warning(
"torchao quantized model does not support safe serialization, " "torchao quantized model does not support safe serialization, please set `safe_serialization` to False"
"please set `safe_serialization` to False"
) )
return False return False
_is_torchao_serializable = version.parse(importlib.metadata.version("huggingface_hub")) >= version.parse( _is_torchao_serializable = version.parse(importlib.metadata.version("huggingface_hub")) >= version.parse(

View File

@ -868,7 +868,7 @@ class SpecialTokensMixin:
def __init__(self, verbose=False, **kwargs): def __init__(self, verbose=False, **kwargs):
self._pad_token_type_id = 0 self._pad_token_type_id = 0
self.verbose = verbose self.verbose = verbose
self._special_tokens_map = {attr: None for attr in self.SPECIAL_TOKENS_ATTRIBUTES} self._special_tokens_map = dict.fromkeys(self.SPECIAL_TOKENS_ATTRIBUTES)
self._special_tokens_map["additional_special_tokens"] = [] # for BC where it defaults to empty list self._special_tokens_map["additional_special_tokens"] = [] # for BC where it defaults to empty list
# We directly set the hidden value to allow initialization with special tokens # We directly set the hidden value to allow initialization with special tokens
@ -881,9 +881,9 @@ class SpecialTokensMixin:
if key in self.SPECIAL_TOKENS_ATTRIBUTES: if key in self.SPECIAL_TOKENS_ATTRIBUTES:
if key == "additional_special_tokens": if key == "additional_special_tokens":
assert isinstance(value, (list, tuple)), f"Value {value} is not a list or tuple" assert isinstance(value, (list, tuple)), f"Value {value} is not a list or tuple"
assert all( assert all(isinstance(t, (str, AddedToken)) for t in value), (
isinstance(t, (str, AddedToken)) for t in value "One of the tokens is not a string or an AddedToken"
), "One of the tokens is not a string or an AddedToken" )
setattr(self, key, value) setattr(self, key, value)
elif isinstance(value, (str, AddedToken)): elif isinstance(value, (str, AddedToken)):
setattr(self, key, value) setattr(self, key, value)
@ -967,9 +967,9 @@ class SpecialTokensMixin:
logger.info(f"Assigning {value} to the {key} key of the tokenizer") logger.info(f"Assigning {value} to the {key} key of the tokenizer")
if key == "additional_special_tokens": if key == "additional_special_tokens":
assert isinstance(value, (list, tuple)) and all( assert isinstance(value, (list, tuple)) and all(isinstance(t, (str, AddedToken)) for t in value), (
isinstance(t, (str, AddedToken)) for t in value f"Tokens {value} for key {key} should all be str or AddedToken instances"
), f"Tokens {value} for key {key} should all be str or AddedToken instances" )
to_add = [] to_add = []
for token in value: for token in value:
@ -3379,9 +3379,9 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
return BatchEncoding(encoded_inputs, tensor_type=return_tensors) return BatchEncoding(encoded_inputs, tensor_type=return_tensors)
batch_size = len(required_input) batch_size = len(required_input)
assert all( assert all(len(v) == batch_size for v in encoded_inputs.values()), (
len(v) == batch_size for v in encoded_inputs.values() "Some items in the output dictionary have a different batch size than others."
), "Some items in the output dictionary have a different batch size than others." )
if padding_strategy == PaddingStrategy.LONGEST: if padding_strategy == PaddingStrategy.LONGEST:
max_length = max(len(inputs) for inputs in required_input) max_length = max(len(inputs) for inputs in required_input)

View File

@ -749,12 +749,12 @@ class EarlyStoppingCallback(TrainerCallback, ExportableState):
"Using EarlyStoppingCallback without load_best_model_at_end=True. " "Using EarlyStoppingCallback without load_best_model_at_end=True. "
"Once training is finished, the best model will not be loaded automatically." "Once training is finished, the best model will not be loaded automatically."
) )
assert ( assert args.metric_for_best_model is not None, (
args.metric_for_best_model is not None "EarlyStoppingCallback requires metric_for_best_model to be defined"
), "EarlyStoppingCallback requires metric_for_best_model to be defined" )
assert ( assert args.eval_strategy != IntervalStrategy.NO, (
args.eval_strategy != IntervalStrategy.NO "EarlyStoppingCallback requires IntervalStrategy of steps or epoch"
), "EarlyStoppingCallback requires IntervalStrategy of steps or epoch" )
def on_evaluate(self, args, state, control, metrics, **kwargs): def on_evaluate(self, args, state, control, metrics, **kwargs):
metric_to_check = args.metric_for_best_model metric_to_check = args.metric_for_best_model

View File

@ -121,9 +121,9 @@ def nested_concat(tensors, new_tensors, padding_index=-100):
nested list/tuples/dict of tensors. nested list/tuples/dict of tensors.
""" """
if not (isinstance(tensors, torch.Tensor) and isinstance(new_tensors, torch.Tensor)): if not (isinstance(tensors, torch.Tensor) and isinstance(new_tensors, torch.Tensor)):
assert ( assert type(tensors) is type(new_tensors), (
type(tensors) is type(new_tensors) f"Expected `tensors` and `new_tensors` to have the same type but found {type(tensors)} and {type(new_tensors)}."
), f"Expected `tensors` and `new_tensors` to have the same type but found {type(tensors)} and {type(new_tensors)}." )
if isinstance(tensors, (list, tuple)): if isinstance(tensors, (list, tuple)):
return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors)) return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors))
elif isinstance(tensors, torch.Tensor): elif isinstance(tensors, torch.Tensor):
@ -381,15 +381,15 @@ class SequentialDistributedSampler(Sampler):
# add extra samples to make it evenly divisible # add extra samples to make it evenly divisible
indices += indices[: (self.total_size - len(indices))] indices += indices[: (self.total_size - len(indices))]
assert ( assert len(indices) == self.total_size, (
len(indices) == self.total_size f"Indices length {len(indices)} and total size {self.total_size} mismatched"
), f"Indices length {len(indices)} and total size {self.total_size} mismatched" )
# subsample # subsample
indices = indices[self.rank * self.num_samples : (self.rank + 1) * self.num_samples] indices = indices[self.rank * self.num_samples : (self.rank + 1) * self.num_samples]
assert ( assert len(indices) == self.num_samples, (
len(indices) == self.num_samples f"Indices length {len(indices)} and sample number {self.num_samples} mismatched"
), f"Indices length {len(indices)} and sample number {self.num_samples} mismatched" )
return iter(indices) return iter(indices)
@ -506,9 +506,9 @@ class DistributedTensorGatherer:
if isinstance(arrays, (list, tuple)): if isinstance(arrays, (list, tuple)):
result = [self._nested_set_tensors(x, y) for x, y in zip(storage, arrays)] result = [self._nested_set_tensors(x, y) for x, y in zip(storage, arrays)]
return result[0][0], type(arrays)(r[1] for r in result) return result[0][0], type(arrays)(r[1] for r in result)
assert ( assert arrays.shape[0] % self.world_size == 0, (
arrays.shape[0] % self.world_size == 0 f"Arrays passed should all have a first dimension multiple of {self.world_size}, found {arrays.shape[0]}."
), f"Arrays passed should all have a first dimension multiple of {self.world_size}, found {arrays.shape[0]}." )
slice_len = arrays.shape[0] // self.world_size slice_len = arrays.shape[0] // self.world_size
for i in range(self.world_size): for i in range(self.world_size):

View File

@ -412,7 +412,7 @@ def _compile_jinja_template(chat_template):
if version.parse(jinja2.__version__) < version.parse("3.1.0"): if version.parse(jinja2.__version__) < version.parse("3.1.0"):
raise ImportError( raise ImportError(
"apply_chat_template requires jinja2>=3.1.0 to be installed. Your version is " f"{jinja2.__version__}." f"apply_chat_template requires jinja2>=3.1.0 to be installed. Your version is {jinja2.__version__}."
) )
def raise_exception(message): def raise_exception(message):

View File

@ -513,7 +513,9 @@ def cached_files(
return None return None
# Now we raise for missing entries # Now we raise for missing entries
revision_ = "main" if revision is None else revision revision_ = "main" if revision is None else revision
msg = f"a file named {missing_entries[0]}" if len(missing_entries) == 1 else f"files named {*missing_entries,}" msg = (
f"a file named {missing_entries[0]}" if len(missing_entries) == 1 else f"files named {(*missing_entries,)}"
)
raise EnvironmentError( raise EnvironmentError(
f"{path_or_repo_id} does not appear to have {msg}. Checkout 'https://huggingface.co/{path_or_repo_id}/tree/{revision_}'" f"{path_or_repo_id} does not appear to have {msg}. Checkout 'https://huggingface.co/{path_or_repo_id}/tree/{revision_}'"
"for available files." "for available files."

Some files were not shown because too many files have changed in this diff Show More