diff --git a/setup.py b/setup.py index e227d210f04..c7e8fd76365 100644 --- a/setup.py +++ b/setup.py @@ -162,7 +162,7 @@ _deps = [ "rhoknp>=1.1.0,<1.3.1", "rjieba", "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1", - "ruff==0.5.1", + "ruff==0.11.2", "sacrebleu>=1.4.12,<2.0.0", "sacremoses", "safetensors>=0.4.3", diff --git a/src/transformers/agents/tools.py b/src/transformers/agents/tools.py index acea6e7700f..f7ead9f2ebe 100644 --- a/src/transformers/agents/tools.py +++ b/src/transformers/agents/tools.py @@ -167,9 +167,9 @@ class Tool: ) for input_name, input_content in self.inputs.items(): assert isinstance(input_content, dict), f"Input '{input_name}' should be a dictionary." - assert ( - "type" in input_content and "description" in input_content - ), f"Input '{input_name}' should have keys 'type' and 'description', has only {list(input_content.keys())}." + assert "type" in input_content and "description" in input_content, ( + f"Input '{input_name}' should have keys 'type' and 'description', has only {list(input_content.keys())}." + ) if input_content["type"] not in authorized_types: raise Exception( f"Input '{input_name}': type '{input_content['type']}' is not an authorized value, should be one of {authorized_types}." diff --git a/src/transformers/commands/add_fast_image_processor.py b/src/transformers/commands/add_fast_image_processor.py index a78fc2a7cf2..e999532ee31 100644 --- a/src/transformers/commands/add_fast_image_processor.py +++ b/src/transformers/commands/add_fast_image_processor.py @@ -288,7 +288,7 @@ def add_fast_image_processor_to_dummy(fast_image_processor_name: str): if new_dummy_object not in content: if index_new != len(image_processor_names) - 1: # add the dummy object just before the next ImageProcessorFast - first_line = f"class {image_processor_names[index_new+1]}(metaclass=DummyObject):" + first_line = f"class {image_processor_names[index_new + 1]}(metaclass=DummyObject):" updated_content = content.replace(first_line, new_dummy_object + "\n\n" + first_line) else: # add the dummy object at the very end @@ -313,11 +313,9 @@ def add_fast_image_processor_to_doc(fast_image_processor_name: str, model_name: raise ValueError(f"No doc files found for {model_name}") base_doc_string = ( - f"## {fast_image_processor_name[:-4]}\n\n" f"[[autodoc]] {fast_image_processor_name[:-4]}\n" " - preprocess" - ) - fast_doc_string = ( - f"## {fast_image_processor_name}\n\n" f"[[autodoc]] {fast_image_processor_name}\n" " - preprocess" + f"## {fast_image_processor_name[:-4]}\n\n[[autodoc]] {fast_image_processor_name[:-4]}\n - preprocess" ) + fast_doc_string = f"## {fast_image_processor_name}\n\n[[autodoc]] {fast_image_processor_name}\n - preprocess" for doc_file in doc_files: with open(doc_file, "r", encoding="utf-8") as f: @@ -385,7 +383,7 @@ def add_fast_image_processor_to_tests(fast_image_processor_name: str, model_name # add the fast image processor to the imports base_import_string = f" from transformers import {fast_image_processor_name[:-4]}" fast_import_string = ( - " if is_torchvision_available():\n" f" from transformers import {fast_image_processor_name}" + f" if is_torchvision_available():\n from transformers import {fast_image_processor_name}" ) if fast_import_string not in updated_content: updated_content = updated_content.replace(base_import_string, base_import_string + "\n\n" + fast_import_string) @@ -546,17 +544,17 @@ def add_fast_image_processor_file( " # For an example of a fast image processor requiring more complex augmentations, see `LlavaNextImageProcessorFast`.\n\n" " # Default values should be checked against the slow image processor\n" " # None values left after checking can be removed\n" - f' resample = {default_args_dict.get("resample")}\n' - f' image_mean = {default_args_dict.get("image_mean")}\n' - f' image_std = {default_args_dict.get("image_std")}\n' - f' size = {default_args_dict.get("size")}\n' - f' default_to_square = {default_args_dict.get("default_to_square")}\n' - f' crop_size = {default_args_dict.get("crop_size")}\n' - f' do_resize = {default_args_dict.get("do_resize")}\n' - f' do_center_crop = {default_args_dict.get("do_center_crop")}\n' - f' do_rescale = {default_args_dict.get("do_rescale")}\n' - f' do_normalize = {default_args_dict.get("do_normalize")}\n' - f' do_convert_rgb = {default_args_dict.get("do_convert_rgb")}\n\n\n' + f" resample = {default_args_dict.get('resample')}\n" + f" image_mean = {default_args_dict.get('image_mean')}\n" + f" image_std = {default_args_dict.get('image_std')}\n" + f" size = {default_args_dict.get('size')}\n" + f" default_to_square = {default_args_dict.get('default_to_square')}\n" + f" crop_size = {default_args_dict.get('crop_size')}\n" + f" do_resize = {default_args_dict.get('do_resize')}\n" + f" do_center_crop = {default_args_dict.get('do_center_crop')}\n" + f" do_rescale = {default_args_dict.get('do_rescale')}\n" + f" do_normalize = {default_args_dict.get('do_normalize')}\n" + f" do_convert_rgb = {default_args_dict.get('do_convert_rgb')}\n\n\n" f'__all__ = ["{fast_image_processor_name}"]\n' ) diff --git a/src/transformers/convert_graph_to_onnx.py b/src/transformers/convert_graph_to_onnx.py index 4301716ebde..922ece8c0f4 100644 --- a/src/transformers/convert_graph_to_onnx.py +++ b/src/transformers/convert_graph_to_onnx.py @@ -189,7 +189,7 @@ def infer_shapes(nlp: Pipeline, framework: str) -> tuple[list[str], list[str], d raise ValueError(f"Unable to infer tensor axes ({len(tensor.shape)})") else: seq_axes = [dim for dim, shape in enumerate(tensor.shape) if shape == seq_len] - axes.update({dim: "sequence" for dim in seq_axes}) + axes.update(dict.fromkeys(seq_axes, "sequence")) print(f"Found {'input' if is_input else 'output'} {name} with shape: {axes}") return axes diff --git a/src/transformers/data/metrics/squad_metrics.py b/src/transformers/data/metrics/squad_metrics.py index 5d98a0bfcf1..f83c23bdeec 100644 --- a/src/transformers/data/metrics/squad_metrics.py +++ b/src/transformers/data/metrics/squad_metrics.py @@ -226,7 +226,7 @@ def squad_evaluate(examples, preds, no_answer_probs=None, no_answer_probability_ no_answer_qids = [qas_id for qas_id, has_answer in qas_id_to_has_answer.items() if not has_answer] if no_answer_probs is None: - no_answer_probs = {k: 0.0 for k in preds} + no_answer_probs = dict.fromkeys(preds, 0.0) exact, f1 = get_raw_scores(examples, preds) diff --git a/src/transformers/data/processors/glue.py b/src/transformers/data/processors/glue.py index 6e95a666849..cbb4a70ab02 100644 --- a/src/transformers/data/processors/glue.py +++ b/src/transformers/data/processors/glue.py @@ -101,7 +101,7 @@ if is_tf_available(): return tf.data.Dataset.from_generator( gen, - ({k: tf.int32 for k in input_names}, label_type), + (dict.fromkeys(input_names, tf.int32), label_type), ({k: tf.TensorShape([None]) for k in input_names}, tf.TensorShape([])), ) diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index 51682b6a706..0384d0c2943 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -68,7 +68,7 @@ deps = { "rhoknp": "rhoknp>=1.1.0,<1.3.1", "rjieba": "rjieba", "rouge-score": "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1", - "ruff": "ruff==0.5.1", + "ruff": "ruff==0.11.2", "sacrebleu": "sacrebleu>=1.4.12,<2.0.0", "sacremoses": "sacremoses", "safetensors": "safetensors>=0.4.3", diff --git a/src/transformers/generation/logits_process.py b/src/transformers/generation/logits_process.py index 16e69538b62..0a349f31855 100644 --- a/src/transformers/generation/logits_process.py +++ b/src/transformers/generation/logits_process.py @@ -2749,9 +2749,7 @@ class SynthIDTextWatermarkLogitsProcessor(LogitsProcessor): ngram keys (batch_size, num_ngrams, depth). """ if len(ngrams.shape) != 3: - raise ValueError( - "Ngrams should be of shape (batch_size, num_ngrams, ngram_len), but" f" is {ngrams.shape}" - ) + raise ValueError(f"Ngrams should be of shape (batch_size, num_ngrams, ngram_len), but is {ngrams.shape}") if ngrams.shape[2] != self.ngram_len: raise ValueError( "Ngrams should be of shape (batch_size, num_ngrams, ngram_len)," @@ -2836,7 +2834,7 @@ class SynthIDTextWatermarkLogitsProcessor(LogitsProcessor): def _check_input_ids_shape(self, input_ids: torch.LongTensor): """Checks the shape of input ids.""" if len(input_ids.shape) != 2: - raise ValueError("Input ids should be of shape (batch_size, input_len), but is" f" {input_ids.shape}") + raise ValueError(f"Input ids should be of shape (batch_size, input_len), but is {input_ids.shape}") def compute_g_values(self, input_ids: torch.LongTensor) -> torch.LongTensor: """ diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index 9f669e175f1..458342595d4 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -1678,7 +1678,7 @@ class GenerationMixin: if execution_device_map is None: return None elif len(execution_device_map) == 1 and "" in execution_device_map: - return {idx: execution_device_map[""] for idx in range(num_hidden_layers)} + return dict.fromkeys(range(num_hidden_layers), execution_device_map[""]) layer_device_map = {} for layer in execution_device_map: for idx in range(num_hidden_layers): diff --git a/src/transformers/integrations/hqq.py b/src/transformers/integrations/hqq.py index 162b365668a..4ff154ee205 100755 --- a/src/transformers/integrations/hqq.py +++ b/src/transformers/integrations/hqq.py @@ -106,11 +106,11 @@ def prepare_for_hqq_linear(model, quantization_config=None, modules_to_not_conve if any(key in linear_tags for key in quant_config.keys()): # If the user doesn't specify a key from get_linear_tags, the layer is not quantized via (key, None) - patch_params = {key: None for key in linear_tags} + patch_params = dict.fromkeys(linear_tags) patch_params.update(quant_config) else: # Same quant_config for all layers - patch_params = {k: quant_config for k in linear_tags} + patch_params = dict.fromkeys(linear_tags, quant_config) model, has_been_replaced = _prepare_for_hqq_linear( model, patch_params=patch_params, has_been_replaced=has_been_replaced diff --git a/src/transformers/integrations/tpu.py b/src/transformers/integrations/tpu.py index 29262789dc9..1f7b0df3cd3 100644 --- a/src/transformers/integrations/tpu.py +++ b/src/transformers/integrations/tpu.py @@ -21,9 +21,9 @@ def tpu_spmd_dataloader(dataloader: DataLoader): if is_torch_xla_available(): import torch_xla.distributed.parallel_loader as pl - assert isinstance( - dataloader, pl.MpDeviceLoader - ), "The dataloader must be a `torch_xla.distributed.parallel_loader.MpDeviceLoader`." + assert isinstance(dataloader, pl.MpDeviceLoader), ( + "The dataloader must be a `torch_xla.distributed.parallel_loader.MpDeviceLoader`." + ) # This is to support PyTorch/XLA FSDP via SPMD. # Here we shard the input data's 0th dim across the fsdp axis. diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py index 5c439420148..2f903968320 100644 --- a/src/transformers/modeling_flax_utils.py +++ b/src/transformers/modeling_flax_utils.py @@ -154,7 +154,7 @@ def flax_shard_checkpoint(params, max_shard_size="10GB"): weight_map = {} shards = {} for idx, shard in enumerate(sharded_state_dicts): - shard_file = FLAX_WEIGHTS_NAME.replace(".msgpack", f"-{idx+1:05d}-of-{len(sharded_state_dicts):05d}.msgpack") + shard_file = FLAX_WEIGHTS_NAME.replace(".msgpack", f"-{idx + 1:05d}-of-{len(sharded_state_dicts):05d}.msgpack") shards[shard_file] = shard for weight_name in shard.keys(): weight_map[weight_name] = shard_file diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index b6632978fe7..d569d97f855 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -701,7 +701,7 @@ def tf_shard_checkpoint(weights, max_shard_size="10GB", weights_name: str = TF2_ weight_map = {} shards = {} for idx, shard in enumerate(sharded_state_dicts): - shard_file = weights_name.replace(".h5", f"-{idx+1:05d}-of-{len(sharded_state_dicts):05d}.h5") + shard_file = weights_name.replace(".h5", f"-{idx + 1:05d}-of-{len(sharded_state_dicts):05d}.h5") shard_file = shard_file.replace( ".safetensors", f"-{idx + 1:05d}-of-{len(sharded_state_dicts):05d}.safetensors" ) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 8438865a0b0..be79f5b327c 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -2509,9 +2509,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix total_decoder_name="", total_encoder_name="", ): - assert isinstance(decoder_pointer, nn.Module) and isinstance( - encoder_pointer, nn.Module - ), f"{decoder_pointer} and {encoder_pointer} have to be of type nn.Module" + assert isinstance(decoder_pointer, nn.Module) and isinstance(encoder_pointer, nn.Module), ( + f"{decoder_pointer} and {encoder_pointer} have to be of type nn.Module" + ) if hasattr(decoder_pointer, "weight"): assert hasattr(encoder_pointer, "weight") encoder_pointer.weight = decoder_pointer.weight @@ -2525,9 +2525,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix encoder_modules = encoder_pointer._modules decoder_modules = decoder_pointer._modules if len(decoder_modules) > 0: - assert ( - len(encoder_modules) > 0 - ), f"Encoder module {encoder_pointer} does not match decoder module {decoder_pointer}" + assert len(encoder_modules) > 0, ( + f"Encoder module {encoder_pointer} does not match decoder module {decoder_pointer}" + ) all_encoder_weights = {module_name + "/" + sub_name for sub_name in encoder_modules.keys()} encoder_layer_pos = 0 @@ -3571,7 +3571,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix f"Please upgrade accelerate with `pip install -U accelerate`" ) # init state_dict for this shard - shard_state_dict = {name: "" for name in shard} + shard_state_dict = dict.fromkeys(shard, "") for module_name in shard: # skip to collect this weight again if shard_state_dict.get(module_name) != "": @@ -4814,7 +4814,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix param_device_map = expand_device_map(device_map, checkpoint_keys) str_dtype = str(dtype).replace("torch.", "") if dtype is not None else "float32" if sharded_metadata is None: - weight_map = {p: checkpoint_files[0] for p in checkpoint_keys} + weight_map = dict.fromkeys(checkpoint_keys, checkpoint_files[0]) else: folder = os.path.sep.join(checkpoint_files[0].split(os.path.sep)[:-1]) # Fix the weight map keys according to the key mapping @@ -5446,9 +5446,9 @@ class PoolerEndLogits(nn.Module): Returns: `torch.FloatTensor`: The end logits for SQuAD. """ - assert ( - start_states is not None or start_positions is not None - ), "One of start_states, start_positions should be not None" + assert start_states is not None or start_positions is not None, ( + "One of start_states, start_positions should be not None" + ) if start_positions is not None: slen, hsz = hidden_states.shape[-2:] start_positions = start_positions[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz) @@ -5514,9 +5514,9 @@ class PoolerAnswerClass(nn.Module): """ # No dependency on end_feature so that we can obtain one single `cls_logits` for each sample. hsz = hidden_states.shape[-1] - assert ( - start_states is not None or start_positions is not None - ), "One of start_states, start_positions should be not None" + assert start_states is not None or start_positions is not None, ( + "One of start_states, start_positions should be not None" + ) if start_positions is not None: start_positions = start_positions[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz) start_states = hidden_states.gather(-2, start_positions).squeeze(-2) # shape (bsz, hsz) diff --git a/src/transformers/models/altclip/modeling_altclip.py b/src/transformers/models/altclip/modeling_altclip.py index 7e39a5f0f11..8a6c845efac 100755 --- a/src/transformers/models/altclip/modeling_altclip.py +++ b/src/transformers/models/altclip/modeling_altclip.py @@ -1058,7 +1058,7 @@ class AltCLIPVisionEmbeddings(nn.Module): batch_size, _, height, width = pixel_values.shape if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." + f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})." ) target_dtype = self.patch_embedding.weight.dtype patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] diff --git a/src/transformers/models/bark/convert_suno_to_hf.py b/src/transformers/models/bark/convert_suno_to_hf.py index af1b000a370..f8c8399cb61 100644 --- a/src/transformers/models/bark/convert_suno_to_hf.py +++ b/src/transformers/models/bark/convert_suno_to_hf.py @@ -150,7 +150,7 @@ def _load_model(ckpt_path, device, use_small=False, model_type="text"): model.load_state_dict(state_dict, strict=False) n_params = model.num_parameters(exclude_embeddings=True) val_loss = checkpoint["best_val_loss"].item() - logger.info(f"model loaded: {round(n_params/1e6,1)}M params, {round(val_loss,3)} loss") + logger.info(f"model loaded: {round(n_params / 1e6, 1)}M params, {round(val_loss, 3)} loss") model.eval() model.to(device) del checkpoint, state_dict diff --git a/src/transformers/models/bark/processing_bark.py b/src/transformers/models/bark/processing_bark.py index 5fa5cd19164..7d1bf211784 100644 --- a/src/transformers/models/bark/processing_bark.py +++ b/src/transformers/models/bark/processing_bark.py @@ -103,7 +103,7 @@ class BarkProcessor(ProcessorMixin): ) if speaker_embeddings_path is None: logger.warning( - f"""`{os.path.join(pretrained_processor_name_or_path,speaker_embeddings_dict_path)}` does not exists + f"""`{os.path.join(pretrained_processor_name_or_path, speaker_embeddings_dict_path)}` does not exists , no preloaded speaker embeddings will be used - Make sure to provide a correct path to the json dictionnary if wanted, otherwise set `speaker_embeddings_dict_path=None`.""" ) @@ -202,7 +202,7 @@ class BarkProcessor(ProcessorMixin): ) if path is None: raise ValueError( - f"""`{os.path.join(self.speaker_embeddings.get("repo_or_path", "/"),voice_preset_paths[key])}` does not exists + f"""`{os.path.join(self.speaker_embeddings.get("repo_or_path", "/"), voice_preset_paths[key])}` does not exists , no preloaded voice preset will be used - Make sure to provide correct paths to the {voice_preset} embeddings.""" ) diff --git a/src/transformers/models/bridgetower/modeling_bridgetower.py b/src/transformers/models/bridgetower/modeling_bridgetower.py index 0d4338261ee..2ab7d9dfc15 100644 --- a/src/transformers/models/bridgetower/modeling_bridgetower.py +++ b/src/transformers/models/bridgetower/modeling_bridgetower.py @@ -329,7 +329,7 @@ class BridgeTowerVisionEmbeddings(nn.Module): batch_size, _, height, width = pixel_values.shape if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." + f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})." ) target_dtype = self.patch_embedding.weight.dtype patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] diff --git a/src/transformers/models/chinese_clip/modeling_chinese_clip.py b/src/transformers/models/chinese_clip/modeling_chinese_clip.py index c9c19073b0e..018e9044bc5 100644 --- a/src/transformers/models/chinese_clip/modeling_chinese_clip.py +++ b/src/transformers/models/chinese_clip/modeling_chinese_clip.py @@ -234,7 +234,7 @@ class ChineseCLIPVisionEmbeddings(nn.Module): batch_size, _, height, width = pixel_values.shape if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." + f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})." ) target_dtype = self.patch_embedding.weight.dtype patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] diff --git a/src/transformers/models/clap/convert_clap_original_pytorch_to_hf.py b/src/transformers/models/clap/convert_clap_original_pytorch_to_hf.py index d422bc45ab3..66488e401a1 100644 --- a/src/transformers/models/clap/convert_clap_original_pytorch_to_hf.py +++ b/src/transformers/models/clap/convert_clap_original_pytorch_to_hf.py @@ -74,7 +74,7 @@ def rename_state_dict(state_dict): # replace sequential layers with list sequential_layer = re.match(sequential_layers_pattern, key).group(1) - key = key.replace(f"sequential.{sequential_layer}.", f"layers.{int(sequential_layer)//3}.linear.") + key = key.replace(f"sequential.{sequential_layer}.", f"layers.{int(sequential_layer) // 3}.linear.") elif re.match(text_projection_pattern, key): projecton_layer = int(re.match(text_projection_pattern, key).group(1)) diff --git a/src/transformers/models/clip/modeling_clip.py b/src/transformers/models/clip/modeling_clip.py index 472c7d40983..7898a125fa4 100644 --- a/src/transformers/models/clip/modeling_clip.py +++ b/src/transformers/models/clip/modeling_clip.py @@ -242,7 +242,7 @@ class CLIPVisionEmbeddings(nn.Module): batch_size, _, height, width = pixel_values.shape if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." + f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})." ) target_dtype = self.patch_embedding.weight.dtype patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] diff --git a/src/transformers/models/clipseg/modeling_clipseg.py b/src/transformers/models/clipseg/modeling_clipseg.py index 2d88746b771..3f363dd51f4 100644 --- a/src/transformers/models/clipseg/modeling_clipseg.py +++ b/src/transformers/models/clipseg/modeling_clipseg.py @@ -209,7 +209,7 @@ class CLIPSegVisionEmbeddings(nn.Module): batch_size, _, height, width = pixel_values.shape if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." + f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})." ) patch_embeds = self.patch_embedding(pixel_values) # shape = [*, width, grid, grid] patch_embeds = patch_embeds.flatten(2).transpose(1, 2) diff --git a/src/transformers/models/clvp/configuration_clvp.py b/src/transformers/models/clvp/configuration_clvp.py index cffc962eb32..b06cd5f6a41 100644 --- a/src/transformers/models/clvp/configuration_clvp.py +++ b/src/transformers/models/clvp/configuration_clvp.py @@ -144,7 +144,7 @@ class ClvpEncoderConfig(PretrainedConfig): # this is to make sure that we can load only text or speech configs from the nested ClvpConfig. if config_type not in cls.base_config_key: raise ValueError( - f"We can only load either 'text_config' or 'speech_config' but you are trying to load" f"{config_type}" + f"We can only load either 'text_config' or 'speech_config' but you are trying to load{config_type}" ) # get the text config dict if we are loading from ClvpConfig diff --git a/src/transformers/models/code_llama/tokenization_code_llama_fast.py b/src/transformers/models/code_llama/tokenization_code_llama_fast.py index e6db6427756..c9d4c34b86a 100644 --- a/src/transformers/models/code_llama/tokenization_code_llama_fast.py +++ b/src/transformers/models/code_llama/tokenization_code_llama_fast.py @@ -190,8 +190,8 @@ class CodeLlamaTokenizerFast(PreTrainedTokenizerFast): if eos is None and self.add_eos_token: raise ValueError("add_eos_token = True but eos_token = None") - single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}" - pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}" + single = f"{(bos + ':0 ') if self.add_bos_token else ''}$A:0{(' ' + eos + ':0') if self.add_eos_token else ''}" + pair = f"{single}{(' ' + bos + ':1') if self.add_bos_token else ''} $B:1{(' ' + eos + ':1') if self.add_eos_token else ''}" special_tokens = [] if self.add_bos_token: diff --git a/src/transformers/models/cohere/tokenization_cohere_fast.py b/src/transformers/models/cohere/tokenization_cohere_fast.py index 3570eb15080..c8b0f6d3fed 100644 --- a/src/transformers/models/cohere/tokenization_cohere_fast.py +++ b/src/transformers/models/cohere/tokenization_cohere_fast.py @@ -198,8 +198,8 @@ class CohereTokenizerFast(PreTrainedTokenizerFast): if eos is None and self.add_eos_token: raise ValueError("add_eos_token = True but eos_token = None") - single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}" - pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}" + single = f"{(bos + ':0 ') if self.add_bos_token else ''}$A:0{(' ' + eos + ':0') if self.add_eos_token else ''}" + pair = f"{single}{(' ' + bos + ':1') if self.add_bos_token else ''} $B:1{(' ' + eos + ':1') if self.add_eos_token else ''}" special_tokens = [] if self.add_bos_token: diff --git a/src/transformers/models/data2vec/convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/data2vec/convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py index 10b97dc93d0..93c9afe9f65 100644 --- a/src/transformers/models/data2vec/convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/data2vec/convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py @@ -127,9 +127,9 @@ def convert_data2vec_checkpoint_to_pytorch( # self-attention output self_output: BertSelfOutput = layer.attention.output - assert ( - self_output.dense.weight.shape == data2vec_layer.self_attn.out_proj.weight.shape - ), f"Shape for self_output.dense.weight should be {data2vec_layer.self_attn.out_proj.weight.shape}" + assert self_output.dense.weight.shape == data2vec_layer.self_attn.out_proj.weight.shape, ( + f"Shape for self_output.dense.weight should be {data2vec_layer.self_attn.out_proj.weight.shape}" + ) self_output.dense.weight = data2vec_layer.self_attn.out_proj.weight self_output.dense.bias = data2vec_layer.self_attn.out_proj.bias self_output.LayerNorm.weight = data2vec_layer.self_attn_layer_norm.weight @@ -137,17 +137,17 @@ def convert_data2vec_checkpoint_to_pytorch( # intermediate intermediate: BertIntermediate = layer.intermediate - assert ( - intermediate.dense.weight.shape == data2vec_layer.fc1.weight.shape - ), f"Shape for intermediate.dense.weight should be {data2vec_layer.fc1.weight.shape}" + assert intermediate.dense.weight.shape == data2vec_layer.fc1.weight.shape, ( + f"Shape for intermediate.dense.weight should be {data2vec_layer.fc1.weight.shape}" + ) intermediate.dense.weight = data2vec_layer.fc1.weight intermediate.dense.bias = data2vec_layer.fc1.bias # output bert_output: BertOutput = layer.output - assert ( - bert_output.dense.weight.shape == data2vec_layer.fc2.weight.shape - ), f"Shape for bert_output.dense.weight should be {data2vec_layer.fc2.weight.shape}" + assert bert_output.dense.weight.shape == data2vec_layer.fc2.weight.shape, ( + f"Shape for bert_output.dense.weight should be {data2vec_layer.fc2.weight.shape}" + ) bert_output.dense.weight = data2vec_layer.fc2.weight bert_output.dense.bias = data2vec_layer.fc2.bias bert_output.LayerNorm.weight = data2vec_layer.final_layer_norm.weight diff --git a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py index 71595b4a43c..56056c43138 100644 --- a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py +++ b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py @@ -1491,7 +1491,7 @@ class TFData2VecVisionFCNHead(keras.layers.Layer): kernel_size=kernel_size, padding="same", dilation=dilation, - name=f"conv_module_{i+2}", + name=f"conv_module_{i + 2}", ) ) if self.num_convs == 0: diff --git a/src/transformers/models/deprecated/bort/convert_bort_original_gluonnlp_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/bort/convert_bort_original_gluonnlp_checkpoint_to_pytorch.py index e2f64e9c3cd..1f3d675e091 100644 --- a/src/transformers/models/deprecated/bort/convert_bort_original_gluonnlp_checkpoint_to_pytorch.py +++ b/src/transformers/models/deprecated/bort/convert_bort_original_gluonnlp_checkpoint_to_pytorch.py @@ -180,9 +180,9 @@ def convert_bort_checkpoint_to_pytorch(bort_checkpoint_path: str, pytorch_dump_f gluon_param = to_torch(params[gluon_param]) shape_gluon = gluon_param.shape - assert ( - shape_hf == shape_gluon - ), f"The gluon parameter {gluon_param} has shape {shape_gluon}, but expects shape {shape_hf} for Transformers" + assert shape_hf == shape_gluon, ( + f"The gluon parameter {gluon_param} has shape {shape_gluon}, but expects shape {shape_hf} for Transformers" + ) return gluon_param diff --git a/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py b/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py index 58031bffba0..a8d5eac1e19 100644 --- a/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py +++ b/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py @@ -427,7 +427,7 @@ class SubWordJapaneseTokenizer: ) keisen = "─━│┃┄┅┆┇┈┉┊┋┌┍┎┏┐┑┒┓└┕┖┗┘┙┚┛├┝┞┟┠┡┢┣┤┥┦┧┨┩┪┫┬┭┮┯┰┱┲┳┴┵┶┷┸┹┺┻┼┽┾┿╀╁╂╃╄╅╆╇╈╉╊╋╌╍╎╏═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬╭╮╯╰╱╲╳╴╵╶╷╸╹╺╻╼╽╾╿" blocks = "▀▁▂▃▄▅▆▇█▉▊▋▌▍▎▏▐░▒▓▔▕▖▗▘▙▚▛▜▝▞▟" - self.content_trans1 = str.maketrans({k: "" for k in keisen + blocks}) + self.content_trans1 = str.maketrans(dict.fromkeys(keisen + blocks, "")) def __len__(self): return len(self.ids_to_tokens) diff --git a/src/transformers/models/deprecated/jukebox/convert_jukebox.py b/src/transformers/models/deprecated/jukebox/convert_jukebox.py index b56a25c57c7..960c8f6ff57 100644 --- a/src/transformers/models/deprecated/jukebox/convert_jukebox.py +++ b/src/transformers/models/deprecated/jukebox/convert_jukebox.py @@ -200,7 +200,7 @@ def fix_jukebox_keys(state_dict, model_state_dict, key_prefix, mapping): # handle missmatched shape elif value.shape != model_state_dict[f"{key_prefix}.{key}"].shape: val = model_state_dict[f"{key_prefix}.{key}"] - print(f"{original_key}-> {key} : \nshape {val.shape} and { value.shape}, do not match") + print(f"{original_key}-> {key} : \nshape {val.shape} and {value.shape}, do not match") key = original_key mapping[key] = original_key diff --git a/src/transformers/models/deprecated/jukebox/modeling_jukebox.py b/src/transformers/models/deprecated/jukebox/modeling_jukebox.py index 299757545e6..6aa077a001f 100755 --- a/src/transformers/models/deprecated/jukebox/modeling_jukebox.py +++ b/src/transformers/models/deprecated/jukebox/modeling_jukebox.py @@ -2366,7 +2366,7 @@ class JukeboxModel(JukeboxPreTrainedModel): new_tokens = sample_tokens - previous_sampled_tokens.shape[1] logger.info( - f"Sampling {sample_tokens} tokens for [{start},{start+sample_tokens}]. Conditioning on" + f"Sampling {sample_tokens} tokens for [{start},{start + sample_tokens}]. Conditioning on" f" {conditioning_tokens} tokens" ) @@ -2390,7 +2390,7 @@ class JukeboxModel(JukeboxPreTrainedModel): name = ["Ancestral", "Primed"][music_tokens_i.shape[1] == 0] iterator.set_description( f"[prior level {level}] {name} Sampling {sample_tokens} tokens out of" - f" {self.total_length//prior.raw_to_tokens}", + f" {self.total_length // prior.raw_to_tokens}", refresh=True, ) tokens_i = prior.sample( diff --git a/src/transformers/models/deprecated/open_llama/configuration_open_llama.py b/src/transformers/models/deprecated/open_llama/configuration_open_llama.py index e20c33f24a3..3a19fd24a4c 100644 --- a/src/transformers/models/deprecated/open_llama/configuration_open_llama.py +++ b/src/transformers/models/deprecated/open_llama/configuration_open_llama.py @@ -154,7 +154,7 @@ class OpenLlamaConfig(PretrainedConfig): if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2: raise ValueError( - "`rope_scaling` must be a dictionary with two fields, `type` and `factor`, " f"got {self.rope_scaling}" + f"`rope_scaling` must be a dictionary with two fields, `type` and `factor`, got {self.rope_scaling}" ) rope_scaling_type = self.rope_scaling.get("type", None) rope_scaling_factor = self.rope_scaling.get("factor", None) diff --git a/src/transformers/models/deprecated/realm/modeling_realm.py b/src/transformers/models/deprecated/realm/modeling_realm.py index 67eb94c6c4e..d5bf922f42b 100644 --- a/src/transformers/models/deprecated/realm/modeling_realm.py +++ b/src/transformers/models/deprecated/realm/modeling_realm.py @@ -139,9 +139,9 @@ def load_tf_weights_in_realm(model, config, tf_checkpoint_path): elif m_name == "kernel": array = np.transpose(array) try: - assert ( - pointer.shape == array.shape - ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" + assert pointer.shape == array.shape, ( + f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" + ) except AssertionError as e: e.args += (pointer.shape, array.shape) raise diff --git a/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py b/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py index 8f1a8370933..a383e2937f6 100755 --- a/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py +++ b/src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py @@ -579,7 +579,7 @@ class Speech2Text2Decoder(Speech2Text2PreTrainedModel): if self.gradient_checkpointing and self.training: if use_cache: logger.warning_once( - "`use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache =" " False`..." + "`use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache = False`..." ) use_cache = False diff --git a/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl.py index 982995a43e1..3a0f9c5ca4b 100644 --- a/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl.py @@ -1095,9 +1095,9 @@ class TFTransfoXLForSequenceClassification(TFTransfoXLPreTrainedModel, TFSequenc batch_size, sequence_length = shape_list(input_ids)[:2] else: batch_size, sequence_length = shape_list(inputs_embeds)[:2] - assert ( - self.config.pad_token_id is not None or batch_size == 1 - ), "Cannot handle batch sizes > 1 if no padding token is defined." + assert self.config.pad_token_id is not None or batch_size == 1, ( + "Cannot handle batch sizes > 1 if no padding token is defined." + ) if not tf.is_tensor(sequence_lengths): in_logits = logits[0:batch_size, sequence_lengths] diff --git a/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py b/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py index da7ce405802..cbab6f2108d 100644 --- a/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py +++ b/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py @@ -155,9 +155,9 @@ def load_tf_weights_in_transfo_xl(model, config, tf_path): p_i.data = torch.from_numpy(arr_i) else: try: - assert ( - pointer.shape == array.shape - ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" + assert pointer.shape == array.shape, ( + f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" + ) except AssertionError as e: e.args += (pointer.shape, array.shape) raise @@ -1238,9 +1238,9 @@ class TransfoXLForSequenceClassification(TransfoXLPreTrainedModel): else: batch_size, sequence_length = inputs_embeds.shape[:2] - assert ( - self.config.pad_token_id is not None or batch_size == 1 - ), "Cannot handle batch sizes > 1 if no padding token is defined." + assert self.config.pad_token_id is not None or batch_size == 1, ( + "Cannot handle batch sizes > 1 if no padding token is defined." + ) if self.config.pad_token_id is None: sequence_lengths = -1 else: diff --git a/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py b/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py index e9e709af993..35c089599bd 100644 --- a/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py +++ b/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py @@ -588,9 +588,9 @@ class XLMProphetNetPositionalEmbeddings(nn.Embedding): super().__init__(config.max_position_embeddings, config.hidden_size, config.pad_token_id) def forward(self, inputs_shape, device, attention_mask=None, past_key_values=None, position_ids=None): - assert (position_ids is None) or ( - self.padding_idx is None - ), "If position_ids is pre-computed then padding_idx should not be set." + assert (position_ids is None) or (self.padding_idx is None), ( + "If position_ids is pre-computed then padding_idx should not be set." + ) if position_ids is None: if past_key_values is not None: @@ -784,9 +784,9 @@ class XLMProphetNetNgramSelfAttention(nn.Module): self.head_dim = config.hidden_size // self.num_attn_heads self.ngram = config.ngram - assert ( - self.head_dim * self.num_attn_heads == config.hidden_size - ), "config.hidden_size must be divisible by num_attn_heads" + assert self.head_dim * self.num_attn_heads == config.hidden_size, ( + "config.hidden_size must be divisible by num_attn_heads" + ) # key, value, query projection self.key_proj = nn.Linear(config.hidden_size, config.hidden_size) self.value_proj = nn.Linear(config.hidden_size, config.hidden_size) @@ -1041,9 +1041,9 @@ class XLMProphetNetNgramSelfAttention(nn.Module): if predict_relative_position_buckets is None: key_sequence_length = attn_weights.shape[-1] - assert ( - position_ids[0][0] == key_sequence_length - 1 - ), "`position_ids` are incorrect. They should be of the format 1 2 3 4 5 ... (key_sequence_length - 1)" + assert position_ids[0][0] == key_sequence_length - 1, ( + "`position_ids` are incorrect. They should be of the format 1 2 3 4 5 ... (key_sequence_length - 1)" + ) relative_positions = ( torch.arange(0, key_sequence_length) .unsqueeze(0) @@ -1313,9 +1313,9 @@ class XLMProphetNetEncoder(XLMProphetNetPreTrainedModel): # check if head_mask has a correct number of layers specified if desired if head_mask is not None: - assert head_mask.size()[0] == ( - len(self.layers) - ), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + assert head_mask.size()[0] == (len(self.layers)), ( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + ) for idx, encoder_layer in enumerate(self.layers): if output_hidden_states: encoder_hidden_states = encoder_hidden_states + (hidden_states,) @@ -1488,9 +1488,9 @@ class XLMProphetNetDecoder(XLMProphetNetPreTrainedModel): # prepare attention mask if past_key_values is not None: - assert ( - hidden_states.size(1) == 1 - ), "At the moment `use_cache` is only supported for `decoder_input_ids` of length 1" + assert hidden_states.size(1) == 1, ( + "At the moment `use_cache` is only supported for `decoder_input_ids` of length 1" + ) ngram_hidden_states = [ (ngram_embeddings[ngram - 1] + predicting_stream_pos_embed).repeat(batch_size, 1, 1) diff --git a/src/transformers/models/depth_pro/configuration_depth_pro.py b/src/transformers/models/depth_pro/configuration_depth_pro.py index 36de741b704..64b58d55206 100644 --- a/src/transformers/models/depth_pro/configuration_depth_pro.py +++ b/src/transformers/models/depth_pro/configuration_depth_pro.py @@ -114,7 +114,7 @@ class DepthProConfig(PretrainedConfig): # scaled_images_ratios is sorted if scaled_images_ratios != sorted(scaled_images_ratios): raise ValueError( - f"Values in scaled_images_ratios={scaled_images_ratios} " "should be sorted from low to high" + f"Values in scaled_images_ratios={scaled_images_ratios} should be sorted from low to high" ) # scaled_images_ratios, scaled_images_overlap_ratios, scaled_images_feature_dims should be consistent diff --git a/src/transformers/models/distilbert/modeling_flax_distilbert.py b/src/transformers/models/distilbert/modeling_flax_distilbert.py index f1cf0faaed3..683cfc67ee7 100644 --- a/src/transformers/models/distilbert/modeling_flax_distilbert.py +++ b/src/transformers/models/distilbert/modeling_flax_distilbert.py @@ -275,9 +275,9 @@ class FlaxTransformerBlock(nn.Module): dtype: jnp.dtype = jnp.float32 # the dtype of the computation def setup(self): - assert ( - self.config.dim % self.config.n_heads == 0 - ), f"Hidden size {self.config.dim} not dividable by number of heads {self.config.n_heads}" + assert self.config.dim % self.config.n_heads == 0, ( + f"Hidden size {self.config.dim} not dividable by number of heads {self.config.n_heads}" + ) self.attention = FlaxMultiHeadSelfAttention(self.config, dtype=self.dtype) self.sa_layer_norm = nn.LayerNorm(epsilon=1e-12, dtype=self.dtype) diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index 09b14b89c56..d0ee2f84835 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -269,9 +269,9 @@ class TFTransformerBlock(keras.layers.Layer): self.activation = config.activation self.output_attentions = config.output_attentions - assert ( - config.dim % config.n_heads == 0 - ), f"Hidden size {config.dim} not dividable by number of heads {config.n_heads}" + assert config.dim % config.n_heads == 0, ( + f"Hidden size {config.dim} not dividable by number of heads {config.n_heads}" + ) self.attention = TFMultiHeadSelfAttention(config, name="attention") self.sa_layer_norm = keras.layers.LayerNormalization(epsilon=1e-12, name="sa_layer_norm") diff --git a/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py b/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py index c11345d1eb4..d24c2f01db4 100644 --- a/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py @@ -137,7 +137,7 @@ if __name__ == "__main__": dest_dir = f"converted-{src_file.name}" if args.dest is None else args.dest dest_dir = Path(dest_dir) assert src_file.exists() - assert ( - args.type is not None - ), "Please specify the component type of the DPR model to convert: 'ctx_encoder', 'question_encoder' or 'reader'." + assert args.type is not None, ( + "Please specify the component type of the DPR model to convert: 'ctx_encoder', 'question_encoder' or 'reader'." + ) convert(args.type, src_file, dest_dir) diff --git a/src/transformers/models/dpr/tokenization_dpr_fast.py b/src/transformers/models/dpr/tokenization_dpr_fast.py index 026ba1a8907..f4e7c0fdcdb 100644 --- a/src/transformers/models/dpr/tokenization_dpr_fast.py +++ b/src/transformers/models/dpr/tokenization_dpr_fast.py @@ -170,9 +170,9 @@ class CustomDPRReaderTokenizerMixin: texts = texts if not isinstance(texts, str) else [texts] n_passages = len(titles) questions = questions if not isinstance(questions, str) else [questions] * n_passages - assert len(titles) == len( - texts - ), f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts." + assert len(titles) == len(texts), ( + f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts." + ) encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"] encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"] encoded_inputs = { diff --git a/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py b/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py index 16e4d71212b..d7dc6d104f4 100644 --- a/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py +++ b/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py @@ -119,7 +119,7 @@ def rename_key(name): if "refinenet" in name: layer_idx = int(name[len("neck.refinenet") : len("neck.refinenet") + 1]) # tricky here: we need to map 4 to 0, 3 to 1, 2 to 2 and 1 to 3 - name = name.replace(f"refinenet{layer_idx}", f"fusion_stage.layers.{abs(layer_idx-4)}") + name = name.replace(f"refinenet{layer_idx}", f"fusion_stage.layers.{abs(layer_idx - 4)}") if "out_conv" in name: name = name.replace("out_conv", "projection") if "resConfUnit1" in name: diff --git a/src/transformers/models/dpt/convert_dpt_to_pytorch.py b/src/transformers/models/dpt/convert_dpt_to_pytorch.py index 489da9acd19..55e0a444e85 100644 --- a/src/transformers/models/dpt/convert_dpt_to_pytorch.py +++ b/src/transformers/models/dpt/convert_dpt_to_pytorch.py @@ -107,7 +107,7 @@ def rename_key(name): if "refinenet" in name: layer_idx = int(name[len("neck.refinenet") : len("neck.refinenet") + 1]) # tricky here: we need to map 4 to 0, 3 to 1, 2 to 2 and 1 to 3 - name = name.replace(f"refinenet{layer_idx}", f"fusion_stage.layers.{abs(layer_idx-4)}") + name = name.replace(f"refinenet{layer_idx}", f"fusion_stage.layers.{abs(layer_idx - 4)}") if "out_conv" in name: name = name.replace("out_conv", "projection") if "resConfUnit1" in name: diff --git a/src/transformers/models/encodec/modeling_encodec.py b/src/transformers/models/encodec/modeling_encodec.py index 1bf2455af6f..e2b279ca672 100644 --- a/src/transformers/models/encodec/modeling_encodec.py +++ b/src/transformers/models/encodec/modeling_encodec.py @@ -617,8 +617,7 @@ class EncodecModel(EncodecPreTrainedModel): bandwidth = self.config.target_bandwidths[0] if bandwidth not in self.config.target_bandwidths: raise ValueError( - f"This model doesn't support the bandwidth {bandwidth}. " - f"Select one of {self.config.target_bandwidths}." + f"This model doesn't support the bandwidth {bandwidth}. Select one of {self.config.target_bandwidths}." ) _, channels, input_length = input_values.shape diff --git a/src/transformers/models/esm/openfold_utils/residue_constants.py b/src/transformers/models/esm/openfold_utils/residue_constants.py index 200e0d421b8..b05a603fb29 100644 --- a/src/transformers/models/esm/openfold_utils/residue_constants.py +++ b/src/transformers/models/esm/openfold_utils/residue_constants.py @@ -399,13 +399,11 @@ def map_structure_with_atom_order(in_list: list, first_call: bool = True) -> lis @functools.lru_cache(maxsize=None) -def load_stereo_chemical_props() -> ( - Tuple[ - Mapping[str, List[Bond]], - Mapping[str, List[Bond]], - Mapping[str, List[BondAngle]], - ] -): +def load_stereo_chemical_props() -> Tuple[ + Mapping[str, List[Bond]], + Mapping[str, List[Bond]], + Mapping[str, List[BondAngle]], +]: """Load stereo_chemical_props.txt into a nice structure. Load literature values for bond lengths and bond angles and translate bond angles into the length of the opposite diff --git a/src/transformers/models/flava/modeling_flava.py b/src/transformers/models/flava/modeling_flava.py index 94395bd2711..ca08cad4d28 100644 --- a/src/transformers/models/flava/modeling_flava.py +++ b/src/transformers/models/flava/modeling_flava.py @@ -1495,9 +1495,9 @@ class FlavaImageCodebookLayerGroup(nn.Module): blocks = OrderedDict() for i in range(num_blocks): if i == 0: - blocks[f"block_{i+1}"] = FlavaImageCodebookBlock(in_size, out_size, num_layers) + blocks[f"block_{i + 1}"] = FlavaImageCodebookBlock(in_size, out_size, num_layers) else: - blocks[f"block_{i+1}"] = FlavaImageCodebookBlock(out_size, out_size, num_layers) + blocks[f"block_{i + 1}"] = FlavaImageCodebookBlock(out_size, out_size, num_layers) if use_pool: blocks["pool"] = nn.MaxPool2d(kernel_size=2) diff --git a/src/transformers/models/fsmt/modeling_fsmt.py b/src/transformers/models/fsmt/modeling_fsmt.py index 9961ea4c88f..fa5ec7fdda2 100644 --- a/src/transformers/models/fsmt/modeling_fsmt.py +++ b/src/transformers/models/fsmt/modeling_fsmt.py @@ -539,9 +539,9 @@ class FSMTEncoder(nn.Module): all_attentions = () if output_attentions else None # check if head_mask has a correct number of layers specified if desired if head_mask is not None: - assert head_mask.size()[0] == ( - len(self.layers) - ), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + assert head_mask.size()[0] == (len(self.layers)), ( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + ) for idx, encoder_layer in enumerate(self.layers): if output_hidden_states: x = x.transpose(0, 1) # T x B x C -> B x T x C @@ -960,9 +960,9 @@ class Attention(nn.Module): attn_weights = nn.functional.softmax(attn_weights, dim=-1) if layer_head_mask is not None: - assert layer_head_mask.size() == ( - self.num_heads, - ), f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + assert layer_head_mask.size() == (self.num_heads,), ( + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + ) attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) diff --git a/src/transformers/models/funnel/configuration_funnel.py b/src/transformers/models/funnel/configuration_funnel.py index b164f286042..5dd1b09088b 100644 --- a/src/transformers/models/funnel/configuration_funnel.py +++ b/src/transformers/models/funnel/configuration_funnel.py @@ -113,9 +113,9 @@ class FunnelConfig(PretrainedConfig): self.vocab_size = vocab_size self.block_sizes = block_sizes self.block_repeats = [1] * len(block_sizes) if block_repeats is None else block_repeats - assert len(block_sizes) == len( - self.block_repeats - ), "`block_sizes` and `block_repeats` should have the same length." + assert len(block_sizes) == len(self.block_repeats), ( + "`block_sizes` and `block_repeats` should have the same length." + ) self.num_decoder_layers = num_decoder_layers self.d_model = d_model self.n_head = n_head diff --git a/src/transformers/models/fuyu/configuration_fuyu.py b/src/transformers/models/fuyu/configuration_fuyu.py index 23c3d88a8ec..50a1975c763 100644 --- a/src/transformers/models/fuyu/configuration_fuyu.py +++ b/src/transformers/models/fuyu/configuration_fuyu.py @@ -195,7 +195,7 @@ class FuyuConfig(PretrainedConfig): if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2: raise ValueError( - "`rope_scaling` must be a dictionary with two fields, `type` and `factor`, " f"got {self.rope_scaling}" + f"`rope_scaling` must be a dictionary with two fields, `type` and `factor`, got {self.rope_scaling}" ) rope_scaling_type = self.rope_scaling.get("type", None) rope_scaling_factor = self.rope_scaling.get("factor", None) diff --git a/src/transformers/models/gemma/tokenization_gemma_fast.py b/src/transformers/models/gemma/tokenization_gemma_fast.py index 0e6f4a20b6d..cb15e47d30a 100644 --- a/src/transformers/models/gemma/tokenization_gemma_fast.py +++ b/src/transformers/models/gemma/tokenization_gemma_fast.py @@ -136,8 +136,8 @@ class GemmaTokenizerFast(PreTrainedTokenizerFast): if eos is None and self.add_eos_token: raise ValueError("add_eos_token = True but eos_token = None") - single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}" - pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}" + single = f"{(bos + ':0 ') if self.add_bos_token else ''}$A:0{(' ' + eos + ':0') if self.add_eos_token else ''}" + pair = f"{single}{(' ' + bos + ':1') if self.add_bos_token else ''} $B:1{(' ' + eos + ':1') if self.add_eos_token else ''}" special_tokens = [] if self.add_bos_token: diff --git a/src/transformers/models/git/modeling_git.py b/src/transformers/models/git/modeling_git.py index b3a88545fa3..232370ee01b 100644 --- a/src/transformers/models/git/modeling_git.py +++ b/src/transformers/models/git/modeling_git.py @@ -683,7 +683,7 @@ class GitVisionEmbeddings(nn.Module): batch_size, _, height, width = pixel_values.shape if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." + f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})." ) target_dtype = self.patch_embedding.weight.dtype patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] diff --git a/src/transformers/models/glpn/convert_glpn_to_pytorch.py b/src/transformers/models/glpn/convert_glpn_to_pytorch.py index e19ee938198..5d18c3b73a5 100644 --- a/src/transformers/models/glpn/convert_glpn_to_pytorch.py +++ b/src/transformers/models/glpn/convert_glpn_to_pytorch.py @@ -40,13 +40,13 @@ def rename_keys(state_dict): if "patch_embed" in key: # replace for example patch_embed1 by patch_embeddings.0 idx = key[key.find("patch_embed") + len("patch_embed")] - key = key.replace(f"patch_embed{idx}", f"patch_embeddings.{int(idx)-1}") + key = key.replace(f"patch_embed{idx}", f"patch_embeddings.{int(idx) - 1}") if "norm" in key: key = key.replace("norm", "layer_norm") if "glpn.encoder.layer_norm" in key: # replace for example layer_norm1 by layer_norm.0 idx = key[key.find("glpn.encoder.layer_norm") + len("glpn.encoder.layer_norm")] - key = key.replace(f"layer_norm{idx}", f"layer_norm.{int(idx)-1}") + key = key.replace(f"layer_norm{idx}", f"layer_norm.{int(idx) - 1}") if "layer_norm1" in key: key = key.replace("layer_norm1", "layer_norm_1") if "layer_norm2" in key: @@ -54,7 +54,7 @@ def rename_keys(state_dict): if "block" in key: # replace for example block1 by block.0 idx = key[key.find("block") + len("block")] - key = key.replace(f"block{idx}", f"block.{int(idx)-1}") + key = key.replace(f"block{idx}", f"block.{int(idx) - 1}") if "attn.q" in key: key = key.replace("attn.q", "attention.self.query") if "attn.proj" in key: @@ -73,7 +73,7 @@ def rename_keys(state_dict): if "linear_c" in key: # replace for example linear_c4 by linear_c.3 idx = key[key.find("linear_c") + len("linear_c")] - key = key.replace(f"linear_c{idx}", f"linear_c.{int(idx)-1}") + key = key.replace(f"linear_c{idx}", f"linear_c.{int(idx) - 1}") if "bot_conv" in key: key = key.replace("bot_conv", "0.convolution") if "skip_conv1" in key: diff --git a/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py b/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py index d2ea1c3984f..767a8a68f63 100644 --- a/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py +++ b/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py @@ -154,8 +154,8 @@ class GPTNeoXTokenizerFast(PreTrainedTokenizerFast): if eos is None and self.add_eos_token: raise ValueError("add_eos_token = True but eos_token = None") - single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}" - pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}" + single = f"{(bos + ':0 ') if self.add_bos_token else ''}$A:0{(' ' + eos + ':0') if self.add_eos_token else ''}" + pair = f"{single}{(' ' + bos + ':1') if self.add_bos_token else ''} $B:1{(' ' + eos + ':1') if self.add_eos_token else ''}" special_tokens = [] if self.add_bos_token: diff --git a/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py b/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py index 1030c93397e..19b0fd2375c 100644 --- a/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +++ b/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py @@ -250,7 +250,7 @@ class SubWordJapaneseTokenizer: ) keisen = "─━│┃┄┅┆┇┈┉┊┋┌┍┎┏┐┑┒┓└┕┖┗┘┙┚┛├┝┞┟┠┡┢┣┤┥┦┧┨┩┪┫┬┭┮┯┰┱┲┳┴┵┶┷┸┹┺┻┼┽┾┿╀╁╂╃╄╅╆╇╈╉╊╋╌╍╎╏═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬╭╮╯╰╱╲╳╴╵╶╷╸╹╺╻╼╽╾╿" blocks = "▀▁▂▃▄▅▆▇█▉▊▋▌▍▎▏▐░▒▓▔▕▖▗▘▙▚▛▜▝▞▟" - self.content_trans1 = str.maketrans({k: "" for k in keisen + blocks}) + self.content_trans1 = str.maketrans(dict.fromkeys(keisen + blocks, "")) def __len__(self): return len(self.ids_to_tokens) diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py index 8971656365b..3550f639586 100644 --- a/src/transformers/models/hubert/modeling_tf_hubert.py +++ b/src/transformers/models/hubert/modeling_tf_hubert.py @@ -587,7 +587,7 @@ class TFHubertFeatureEncoder(keras.layers.Layer): if config.feat_extract_norm == "group": conv_layers = [TFHubertGroupNormConvLayer(config, layer_id=0, name=f"conv_layers.{0}")] + [ - TFHubertNoLayerNormConvLayer(config, layer_id=i + 1, name=f"conv_layers.{i+1}") + TFHubertNoLayerNormConvLayer(config, layer_id=i + 1, name=f"conv_layers.{i + 1}") for i in range(config.num_feat_extract_layers - 1) ] elif config.feat_extract_norm == "layer": diff --git a/src/transformers/models/ibert/quant_modules.py b/src/transformers/models/ibert/quant_modules.py index 8e2f123c578..d490d555a70 100644 --- a/src/transformers/models/ibert/quant_modules.py +++ b/src/transformers/models/ibert/quant_modules.py @@ -171,9 +171,9 @@ class QuantAct(nn.Module): x_min = x_act.data.min() x_max = x_act.data.max() - assert ( - x_max.isnan().sum() == 0 and x_min.isnan().sum() == 0 - ), "NaN detected when computing min/max of the activation" + assert x_max.isnan().sum() == 0 and x_min.isnan().sum() == 0, ( + "NaN detected when computing min/max of the activation" + ) # Initialization if self.x_min.min() > -1.1e-5 and self.x_max.max() < 1.1e-5: diff --git a/src/transformers/models/kosmos2/modeling_kosmos2.py b/src/transformers/models/kosmos2/modeling_kosmos2.py index 13c0273b172..c16aab776f1 100644 --- a/src/transformers/models/kosmos2/modeling_kosmos2.py +++ b/src/transformers/models/kosmos2/modeling_kosmos2.py @@ -451,7 +451,7 @@ class Kosmos2VisionEmbeddings(nn.Module): batch_size, _, height, width = pixel_values.shape if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." + f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})." ) target_dtype = self.patch_embedding.weight.dtype patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] diff --git a/src/transformers/models/layoutxlm/processing_layoutxlm.py b/src/transformers/models/layoutxlm/processing_layoutxlm.py index a8881c634c2..892a7c2cf1d 100644 --- a/src/transformers/models/layoutxlm/processing_layoutxlm.py +++ b/src/transformers/models/layoutxlm/processing_layoutxlm.py @@ -101,8 +101,7 @@ class LayoutXLMProcessor(ProcessorMixin): # verify input if self.image_processor.apply_ocr and (boxes is not None): raise ValueError( - "You cannot provide bounding boxes " - "if you initialized the image processor with apply_ocr set to True." + "You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True." ) if self.image_processor.apply_ocr and (word_labels is not None): diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py index e72ed197645..67eb44503f0 100755 --- a/src/transformers/models/led/modeling_led.py +++ b/src/transformers/models/led/modeling_led.py @@ -130,12 +130,12 @@ class LEDEncoderSelfAttention(nn.Module): self.layer_id = layer_id attention_window = config.attention_window[self.layer_id] - assert ( - attention_window % 2 == 0 - ), f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}" - assert ( - attention_window > 0 - ), f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}" + assert attention_window % 2 == 0, ( + f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}" + ) + assert attention_window > 0, ( + f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}" + ) self.one_sided_attn_window_size = attention_window // 2 @@ -169,9 +169,9 @@ class LEDEncoderSelfAttention(nn.Module): value_vectors = self.value(hidden_states) seq_len, batch_size, embed_dim = hidden_states.size() - assert ( - embed_dim == self.embed_dim - ), f"hidden_states should have embed_dim = {self.embed_dim}, but has {embed_dim}" + assert embed_dim == self.embed_dim, ( + f"hidden_states should have embed_dim = {self.embed_dim}, but has {embed_dim}" + ) # normalize query query_vectors /= math.sqrt(self.head_dim) @@ -239,9 +239,9 @@ class LEDEncoderSelfAttention(nn.Module): ) # use fp32 for numerical stability if layer_head_mask is not None: - assert layer_head_mask.size() == ( - self.num_heads, - ), f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + assert layer_head_mask.size() == (self.num_heads,), ( + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + ) attn_probs = layer_head_mask.view(1, 1, -1, 1) * attn_probs # softmax sometimes inserts NaN if all positions are masked, replace them with 0 @@ -433,9 +433,9 @@ class LEDEncoderSelfAttention(nn.Module): overlap of size window_overlap """ batch_size, seq_len, num_heads, head_dim = query.size() - assert ( - seq_len % (window_overlap * 2) == 0 - ), f"Sequence length should be multiple of {window_overlap * 2}. Given {seq_len}" + assert seq_len % (window_overlap * 2) == 0, ( + f"Sequence length should be multiple of {window_overlap * 2}. Given {seq_len}" + ) assert query.size() == key.size() chunks_count = torch.div(seq_len, window_overlap, rounding_mode="trunc") - 1 @@ -706,9 +706,9 @@ class LEDEncoderSelfAttention(nn.Module): # apply layer head masking if layer_head_mask is not None: - assert layer_head_mask.size() == ( - self.num_heads, - ), f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + assert layer_head_mask.size() == (self.num_heads,), ( + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + ) global_attn_probs_float = layer_head_mask.view(1, -1, 1, 1) * global_attn_probs_float.view( batch_size, self.num_heads, max_num_global_attn_indices, seq_len ) diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index ce94c504c4e..fe6c4a8986a 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -182,12 +182,12 @@ class TFLEDEncoderSelfAttention(keras.layers.Layer): self.layer_id = layer_id attention_window = config.attention_window[self.layer_id] - assert ( - attention_window % 2 == 0 - ), f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}" - assert ( - attention_window > 0 - ), f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}" + assert attention_window % 2 == 0, ( + f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}" + ) + assert attention_window > 0, ( + f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}" + ) self.one_sided_attn_window_size = attention_window // 2 diff --git a/src/transformers/models/llama/tokenization_llama_fast.py b/src/transformers/models/llama/tokenization_llama_fast.py index cb8b742ed41..417a2078d27 100644 --- a/src/transformers/models/llama/tokenization_llama_fast.py +++ b/src/transformers/models/llama/tokenization_llama_fast.py @@ -192,8 +192,8 @@ class LlamaTokenizerFast(PreTrainedTokenizerFast): if eos is None and self.add_eos_token: raise ValueError("add_eos_token = True but eos_token = None") - single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}" - pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}" + single = f"{(bos + ':0 ') if self.add_bos_token else ''}$A:0{(' ' + eos + ':0') if self.add_eos_token else ''}" + pair = f"{single}{(' ' + bos + ':1') if self.add_bos_token else ''} $B:1{(' ' + eos + ':1') if self.add_eos_token else ''}" special_tokens = [] if self.add_bos_token: diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index ebdba5c4ed2..ca87b37c650 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -510,12 +510,12 @@ class LongformerSelfAttention(nn.Module): self.layer_id = layer_id attention_window = config.attention_window[self.layer_id] - assert ( - attention_window % 2 == 0 - ), f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}" - assert ( - attention_window > 0 - ), f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}" + assert attention_window % 2 == 0, ( + f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}" + ) + assert attention_window > 0, ( + f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}" + ) self.one_sided_attn_window_size = attention_window // 2 @@ -549,9 +549,9 @@ class LongformerSelfAttention(nn.Module): value_vectors = self.value(hidden_states) seq_len, batch_size, embed_dim = hidden_states.size() - assert ( - embed_dim == self.embed_dim - ), f"hidden_states should have embed_dim = {self.embed_dim}, but has {embed_dim}" + assert embed_dim == self.embed_dim, ( + f"hidden_states should have embed_dim = {self.embed_dim}, but has {embed_dim}" + ) # normalize query query_vectors /= math.sqrt(self.head_dim) @@ -619,9 +619,9 @@ class LongformerSelfAttention(nn.Module): ) # use fp32 for numerical stability if layer_head_mask is not None: - assert layer_head_mask.size() == ( - self.num_heads, - ), f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + assert layer_head_mask.size() == (self.num_heads,), ( + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + ) attn_probs = layer_head_mask.view(1, 1, -1, 1) * attn_probs # softmax sometimes inserts NaN if all positions are masked, replace them with 0 @@ -813,9 +813,9 @@ class LongformerSelfAttention(nn.Module): overlap of size window_overlap """ batch_size, seq_len, num_heads, head_dim = query.size() - assert ( - seq_len % (window_overlap * 2) == 0 - ), f"Sequence length should be multiple of {window_overlap * 2}. Given {seq_len}" + assert seq_len % (window_overlap * 2) == 0, ( + f"Sequence length should be multiple of {window_overlap * 2}. Given {seq_len}" + ) assert query.size() == key.size() chunks_count = torch.div(seq_len, window_overlap, rounding_mode="trunc") - 1 @@ -1086,9 +1086,9 @@ class LongformerSelfAttention(nn.Module): # apply layer head masking if layer_head_mask is not None: - assert layer_head_mask.size() == ( - self.num_heads, - ), f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + assert layer_head_mask.size() == (self.num_heads,), ( + f"Head mask for a single layer should be of size {(self.num_heads,)}, but is {layer_head_mask.size()}" + ) global_attn_probs_float = layer_head_mask.view(1, -1, 1, 1) * global_attn_probs_float.view( batch_size, self.num_heads, max_num_global_attn_indices, seq_len ) @@ -1287,9 +1287,9 @@ class LongformerEncoder(nn.Module): # check if head_mask has a correct number of layers specified if desired if head_mask is not None: - assert head_mask.size()[0] == ( - len(self.layer) - ), f"The head_mask should be specified for {len(self.layer)} layers, but it is for {head_mask.size()[0]}." + assert head_mask.size()[0] == (len(self.layer)), ( + f"The head_mask should be specified for {len(self.layer)} layers, but it is for {head_mask.size()[0]}." + ) for idx, layer_module in enumerate(self.layer): if output_hidden_states: all_hidden_states = all_hidden_states + (hidden_states,) @@ -1590,8 +1590,7 @@ class LongformerModel(LongformerPreTrainedModel): # this path should be recorded in the ONNX export, it is fine with padding_len == 0 as well if padding_len > 0: logger.warning_once( - f"Input ids are automatically padded to be a multiple of " - f"`config.attention_window`: {attention_window}" + f"Input ids are automatically padded to be a multiple of `config.attention_window`: {attention_window}" ) if input_ids is not None: input_ids = nn.functional.pad(input_ids, (0, padding_len), value=pad_token_id) diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index 0f52ca658a7..41eb0ae34ba 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -746,12 +746,12 @@ class TFLongformerSelfAttention(keras.layers.Layer): self.layer_id = layer_id attention_window = config.attention_window[self.layer_id] - assert ( - attention_window % 2 == 0 - ), f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}" - assert ( - attention_window > 0 - ), f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}" + assert attention_window % 2 == 0, ( + f"`attention_window` for layer {self.layer_id} has to be an even value. Given {attention_window}" + ) + assert attention_window > 0, ( + f"`attention_window` for layer {self.layer_id} has to be positive. Given {attention_window}" + ) self.one_sided_attn_window_size = attention_window // 2 diff --git a/src/transformers/models/m2m_100/modeling_m2m_100.py b/src/transformers/models/m2m_100/modeling_m2m_100.py index 8b6553b5292..63a38791182 100755 --- a/src/transformers/models/m2m_100/modeling_m2m_100.py +++ b/src/transformers/models/m2m_100/modeling_m2m_100.py @@ -1294,7 +1294,7 @@ class M2M100Decoder(M2M100PreTrainedModel): if self.gradient_checkpointing and self.training: if use_cache: logger.warning_once( - "`use_cache=True` is incompatible with gradient checkpointing. Setting" " `use_cache=False`..." + "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." ) use_cache = False diff --git a/src/transformers/models/marian/convert_marian_tatoeba_to_pytorch.py b/src/transformers/models/marian/convert_marian_tatoeba_to_pytorch.py index 40ad3294097..6181e94c60a 100644 --- a/src/transformers/models/marian/convert_marian_tatoeba_to_pytorch.py +++ b/src/transformers/models/marian/convert_marian_tatoeba_to_pytorch.py @@ -228,7 +228,7 @@ class TatoebaConverter: # combine with Tatoeba markdown readme_url = f"{TATOEBA_MODELS_URL}/{model_dict['_name']}/README.md" extra_markdown = f""" -### {model_dict['_name']} +### {model_dict["_name"]} * source language name: {self.tag2name[a3_src]} * target language name: {self.tag2name[a3_tgt]} @@ -237,12 +237,12 @@ class TatoebaConverter: content = ( f""" -* model: {model_dict['modeltype']} -* source language code{src_multilingual*'s'}: {', '.join(a2_src_tags)} -* target language code{tgt_multilingual*'s'}: {', '.join(a2_tgt_tags)} +* model: {model_dict["modeltype"]} +* source language code{src_multilingual * "s"}: {", ".join(a2_src_tags)} +* target language code{tgt_multilingual * "s"}: {", ".join(a2_tgt_tags)} * dataset: opus {backtranslated_data} -* release date: {model_dict['release-date']} -* pre-processing: {model_dict['pre-processing']} +* release date: {model_dict["release-date"]} +* pre-processing: {model_dict["pre-processing"]} """ + multilingual_data + tuned diff --git a/src/transformers/models/marian/modeling_marian.py b/src/transformers/models/marian/modeling_marian.py index b64970e8063..f68dceee915 100755 --- a/src/transformers/models/marian/modeling_marian.py +++ b/src/transformers/models/marian/modeling_marian.py @@ -741,9 +741,9 @@ class MarianEncoder(MarianPreTrainedModel): # check if head_mask has a correct number of layers specified if desired if head_mask is not None: - assert head_mask.size()[0] == ( - len(self.layers) - ), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + assert head_mask.size()[0] == (len(self.layers)), ( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + ) for idx, encoder_layer in enumerate(self.layers): if output_hidden_states: encoder_states = encoder_states + (hidden_states,) diff --git a/src/transformers/models/marian/tokenization_marian.py b/src/transformers/models/marian/tokenization_marian.py index c401d38f086..b1fd6463c68 100644 --- a/src/transformers/models/marian/tokenization_marian.py +++ b/src/transformers/models/marian/tokenization_marian.py @@ -339,7 +339,7 @@ class MarianTokenizer(PreTrainedTokenizer): def __getstate__(self) -> Dict: state = self.__dict__.copy() state.update( - {k: None for k in ["spm_source", "spm_target", "current_spm", "punc_normalizer", "target_vocab_file"]} + dict.fromkeys(["spm_source", "spm_target", "current_spm", "punc_normalizer", "target_vocab_file"]) ) return state diff --git a/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py index ea1c578509f..3fd28acad39 100644 --- a/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py @@ -523,11 +523,11 @@ class OriginalMask2FormerCheckpointToOursConverter: [ ( f"{src_prefix}.norm{layer_idx}.weight", - f"{dst_prefix}.hidden_states_norms.stage{layer_idx+1}.weight", + f"{dst_prefix}.hidden_states_norms.stage{layer_idx + 1}.weight", ), ( f"{src_prefix}.norm{layer_idx}.bias", - f"{dst_prefix}.hidden_states_norms.stage{layer_idx+1}.bias", + f"{dst_prefix}.hidden_states_norms.stage{layer_idx + 1}.bias", ), ] ) @@ -863,9 +863,9 @@ def test( for original_model_feature, our_model_feature in zip( original_model_backbone_features.values(), our_model_output.encoder_hidden_states ): - assert torch.allclose( - original_model_feature, our_model_feature, atol=tolerance - ), "The backbone features are not the same." + assert torch.allclose(original_model_feature, our_model_feature, atol=tolerance), ( + "The backbone features are not the same." + ) # Test pixel decoder mask_features, _, multi_scale_features = original_model.sem_seg_head.pixel_decoder.forward_features( @@ -875,9 +875,9 @@ def test( for original_model_feature, our_model_feature in zip( multi_scale_features, our_model_output.pixel_decoder_hidden_states ): - assert torch.allclose( - original_model_feature, our_model_feature, atol=tolerance - ), "The pixel decoder feature are not the same" + assert torch.allclose(original_model_feature, our_model_feature, atol=tolerance), ( + "The pixel decoder feature are not the same" + ) # Let's test the full model tr_complete = T.Compose( @@ -894,12 +894,12 @@ def test( assert original_mask_logits.shape == our_mask_logits.shape, "Output masks shapes are not matching." assert original_class_logits.shape == our_class_logits.shape, "Output class logits shapes are not matching." - assert torch.allclose( - original_class_logits, our_class_logits, atol=tolerance - ), "The class logits are not the same." - assert torch.allclose( - original_mask_logits, our_mask_logits, atol=tolerance - ), "The predicted masks are not the same." + assert torch.allclose(original_class_logits, our_class_logits, atol=tolerance), ( + "The class logits are not the same." + ) + assert torch.allclose(original_mask_logits, our_mask_logits, atol=tolerance), ( + "The predicted masks are not the same." + ) logger.info("✅ Test passed!") diff --git a/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py index 8b73c682455..b55b0e87148 100644 --- a/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py @@ -581,9 +581,9 @@ def test(original_model, our_model: MaskFormerForInstanceSegmentation, image_pro for original_model_feature, our_model_feature in zip( original_model_backbone_features.values(), our_model_output.encoder_hidden_states ): - assert torch.allclose( - original_model_feature, our_model_feature, atol=1e-3 - ), "The backbone features are not the same." + assert torch.allclose(original_model_feature, our_model_feature, atol=1e-3), ( + "The backbone features are not the same." + ) original_model_pixel_out = original_model.sem_seg_head.pixel_decoder.forward_features( original_model_backbone_features @@ -602,9 +602,9 @@ def test(original_model, our_model: MaskFormerForInstanceSegmentation, image_pro our_segmentation = image_processor.post_process_segmentation(our_model_out, target_size=(384, 384)) - assert torch.allclose( - original_segmentation, our_segmentation, atol=1e-3 - ), "The segmentation image is not the same." + assert torch.allclose(original_segmentation, our_segmentation, atol=1e-3), ( + "The segmentation image is not the same." + ) logger.info("✅ Test passed!") diff --git a/src/transformers/models/maskformer/convert_maskformer_resnet_to_pytorch.py b/src/transformers/models/maskformer/convert_maskformer_resnet_to_pytorch.py index 3ca9d9dfc3d..79ef4917e9d 100644 --- a/src/transformers/models/maskformer/convert_maskformer_resnet_to_pytorch.py +++ b/src/transformers/models/maskformer/convert_maskformer_resnet_to_pytorch.py @@ -125,31 +125,31 @@ def create_rename_keys(config): for i in range(3): rename_keys.append( ( - f"backbone.res{stage_idx + 2}.{layer_idx}.conv{i+1}.weight", + f"backbone.res{stage_idx + 2}.{layer_idx}.conv{i + 1}.weight", f"model.pixel_level_module.encoder.encoder.stages.{stage_idx}.layers.{layer_idx}.layer.{i}.convolution.weight", ) ) rename_keys.append( ( - f"backbone.res{stage_idx + 2}.{layer_idx}.conv{i+1}.norm.weight", + f"backbone.res{stage_idx + 2}.{layer_idx}.conv{i + 1}.norm.weight", f"model.pixel_level_module.encoder.encoder.stages.{stage_idx}.layers.{layer_idx}.layer.{i}.normalization.weight", ) ) rename_keys.append( ( - f"backbone.res{stage_idx + 2}.{layer_idx}.conv{i+1}.norm.bias", + f"backbone.res{stage_idx + 2}.{layer_idx}.conv{i + 1}.norm.bias", f"model.pixel_level_module.encoder.encoder.stages.{stage_idx}.layers.{layer_idx}.layer.{i}.normalization.bias", ) ) rename_keys.append( ( - f"backbone.res{stage_idx + 2}.{layer_idx}.conv{i+1}.norm.running_mean", + f"backbone.res{stage_idx + 2}.{layer_idx}.conv{i + 1}.norm.running_mean", f"model.pixel_level_module.encoder.encoder.stages.{stage_idx}.layers.{layer_idx}.layer.{i}.normalization.running_mean", ) ) rename_keys.append( ( - f"backbone.res{stage_idx + 2}.{layer_idx}.conv{i+1}.norm.running_var", + f"backbone.res{stage_idx + 2}.{layer_idx}.conv{i + 1}.norm.running_var", f"model.pixel_level_module.encoder.encoder.stages.{stage_idx}.layers.{layer_idx}.layer.{i}.normalization.running_var", ) ) diff --git a/src/transformers/models/mimi/convert_mimi_checkpoint_to_pytorch.py b/src/transformers/models/mimi/convert_mimi_checkpoint_to_pytorch.py index c617fa036c5..75702aadd31 100644 --- a/src/transformers/models/mimi/convert_mimi_checkpoint_to_pytorch.py +++ b/src/transformers/models/mimi/convert_mimi_checkpoint_to_pytorch.py @@ -129,7 +129,7 @@ def _convert_model( hf_model.load_state_dict(state_dict, strict=True) n_params = param_count(hf_model) - logger.info(f"model loaded: {round(n_params/1e6,1)}M params") + logger.info(f"model loaded: {round(n_params / 1e6, 1)}M params") hf_model.eval() hf_model.to(device) diff --git a/src/transformers/models/mobilebert/modeling_mobilebert.py b/src/transformers/models/mobilebert/modeling_mobilebert.py index 24e29c2a21f..11fde85cf61 100644 --- a/src/transformers/models/mobilebert/modeling_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_mobilebert.py @@ -144,9 +144,9 @@ def load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path): elif m_name == "kernel": array = np.transpose(array) try: - assert ( - pointer.shape == array.shape - ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" + assert pointer.shape == array.shape, ( + f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" + ) except AssertionError as e: e.args += (pointer.shape, array.shape) raise diff --git a/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py b/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py index 7b2f53f8d77..d08642666cd 100644 --- a/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py +++ b/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py @@ -99,9 +99,9 @@ def get_mobilevitv2_config(task_name, orig_cfg_file): orig_config = load_orig_config_file(orig_cfg_file) assert getattr(orig_config, "model.classification.name", -1) == "mobilevit_v2", "Invalid model" config.width_multiplier = getattr(orig_config, "model.classification.mitv2.width_multiplier", 1.0) - assert ( - getattr(orig_config, "model.classification.mitv2.attn_norm_layer", -1) == "layer_norm_2d" - ), "Norm layers other than layer_norm_2d is not supported" + assert getattr(orig_config, "model.classification.mitv2.attn_norm_layer", -1) == "layer_norm_2d", ( + "Norm layers other than layer_norm_2d is not supported" + ) config.hidden_act = getattr(orig_config, "model.classification.activation.name", "swish") # config.image_size == getattr(orig_config, 'sampler.bs.crop_size_width', 256) @@ -151,7 +151,7 @@ def create_rename_keys(state_dict, base_model=False): k_new = k_new.replace("conv_1.", f"{model_prefix}conv_stem.") for i in [1, 2]: if f"layer_{i}." in k: - k_new = k_new.replace(f"layer_{i}.", f"{model_prefix}encoder.layer.{i-1}.layer.") + k_new = k_new.replace(f"layer_{i}.", f"{model_prefix}encoder.layer.{i - 1}.layer.") if ".exp_1x1." in k: k_new = k_new.replace(".exp_1x1.", ".expand_1x1.") if ".red_1x1." in k: @@ -159,11 +159,11 @@ def create_rename_keys(state_dict, base_model=False): for i in [3, 4, 5]: if f"layer_{i}.0." in k: - k_new = k_new.replace(f"layer_{i}.0.", f"{model_prefix}encoder.layer.{i-1}.downsampling_layer.") + k_new = k_new.replace(f"layer_{i}.0.", f"{model_prefix}encoder.layer.{i - 1}.downsampling_layer.") if f"layer_{i}.1.local_rep.0." in k: - k_new = k_new.replace(f"layer_{i}.1.local_rep.0.", f"{model_prefix}encoder.layer.{i-1}.conv_kxk.") + k_new = k_new.replace(f"layer_{i}.1.local_rep.0.", f"{model_prefix}encoder.layer.{i - 1}.conv_kxk.") if f"layer_{i}.1.local_rep.1." in k: - k_new = k_new.replace(f"layer_{i}.1.local_rep.1.", f"{model_prefix}encoder.layer.{i-1}.conv_1x1.") + k_new = k_new.replace(f"layer_{i}.1.local_rep.1.", f"{model_prefix}encoder.layer.{i - 1}.conv_1x1.") for i in [3, 4, 5]: if i == 3: @@ -176,15 +176,17 @@ def create_rename_keys(state_dict, base_model=False): for j in j_in: if f"layer_{i}.1.global_rep.{j}." in k: k_new = k_new.replace( - f"layer_{i}.1.global_rep.{j}.", f"{model_prefix}encoder.layer.{i-1}.transformer.layer.{j}." + f"layer_{i}.1.global_rep.{j}.", f"{model_prefix}encoder.layer.{i - 1}.transformer.layer.{j}." ) - if f"layer_{i}.1.global_rep.{j+1}." in k: + if f"layer_{i}.1.global_rep.{j + 1}." in k: k_new = k_new.replace( - f"layer_{i}.1.global_rep.{j+1}.", f"{model_prefix}encoder.layer.{i-1}.layernorm." + f"layer_{i}.1.global_rep.{j + 1}.", f"{model_prefix}encoder.layer.{i - 1}.layernorm." ) if f"layer_{i}.1.conv_proj." in k: - k_new = k_new.replace(f"layer_{i}.1.conv_proj.", f"{model_prefix}encoder.layer.{i-1}.conv_projection.") + k_new = k_new.replace( + f"layer_{i}.1.conv_proj.", f"{model_prefix}encoder.layer.{i - 1}.conv_projection." + ) if "pre_norm_attn.0." in k: k_new = k_new.replace("pre_norm_attn.0.", "layernorm_before.") diff --git a/src/transformers/models/moonshine/convert_usefulsensors_to_hf.py b/src/transformers/models/moonshine/convert_usefulsensors_to_hf.py index fa80f2b7096..39d8df0f3fa 100644 --- a/src/transformers/models/moonshine/convert_usefulsensors_to_hf.py +++ b/src/transformers/models/moonshine/convert_usefulsensors_to_hf.py @@ -56,7 +56,7 @@ def _read_h5_weights(group, current_key="", weights={}): def _convert_layer_names(name, gated_mlp=False): name = re.sub( r"layers\.functional(?:_(\d+))?\.layers", - lambda m: f'layers.{m.group(1) if m.group(1) else "0"}', + lambda m: f"layers.{m.group(1) if m.group(1) else '0'}", name, count=1, ) diff --git a/src/transformers/models/moshi/convert_moshi_transformers.py b/src/transformers/models/moshi/convert_moshi_transformers.py index 1caaee25ef6..55d8f77ad04 100644 --- a/src/transformers/models/moshi/convert_moshi_transformers.py +++ b/src/transformers/models/moshi/convert_moshi_transformers.py @@ -186,7 +186,7 @@ def _convert_model( hf_model.load_state_dict(state_dict, strict=True) n_params = param_count(hf_model) - logger.info(f"model loaded: {round(n_params/1e6,1)}M params") + logger.info(f"model loaded: {round(n_params / 1e6, 1)}M params") hf_model.eval() hf_model.to(device) diff --git a/src/transformers/models/mt5/modeling_mt5.py b/src/transformers/models/mt5/modeling_mt5.py index 558b6712349..30938c8251c 100644 --- a/src/transformers/models/mt5/modeling_mt5.py +++ b/src/transformers/models/mt5/modeling_mt5.py @@ -719,9 +719,9 @@ def load_tf_weights_in_mt5(model, config, tf_checkpoint_path): logger.info(f"Transposing numpy weight of shape {array.shape} for {name}") array = np.transpose(array) try: - assert ( - pointer.shape == array.shape - ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" + assert pointer.shape == array.shape, ( + f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" + ) except AssertionError as e: e.args += (pointer.shape, array.shape) raise diff --git a/src/transformers/models/nemotron/convert_nemotron_nemo_to_hf.py b/src/transformers/models/nemotron/convert_nemotron_nemo_to_hf.py index b9b1e9c56b0..07a5c51a3c4 100644 --- a/src/transformers/models/nemotron/convert_nemotron_nemo_to_hf.py +++ b/src/transformers/models/nemotron/convert_nemotron_nemo_to_hf.py @@ -65,13 +65,13 @@ def get_args(): "--hf_input_path", type=str, default=None, - help="A HF model path, " "e.g. a folder containing https://huggingface.co/nvidia/Minitron-8B-Base", + help="A HF model path, e.g. a folder containing https://huggingface.co/nvidia/Minitron-8B-Base", ) parser.add_argument( "--hf_output_path", type=str, default=None, - help="Output HF model path, " "with the same format as above but user's own weights", + help="Output HF model path, with the same format as above but user's own weights", ) parser.add_argument( "--precision", diff --git a/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py b/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py index 5f98c0ca3d9..dd995bcbc6b 100644 --- a/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py @@ -82,7 +82,7 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig remove_ignore_keys_(expert_state) expert_state = rename_fairseq_keys(expert_state, expert) save_path = os.path.join( - dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts)+1:05d}-of-???.bin") + dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts) + 1:05d}-of-???.bin") ) torch.save(expert_state, save_path) sharded_state_dicts.append(expert_state.keys()) @@ -91,7 +91,9 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig ) # Add the last block - save_path = os.path.join(dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts)+1:05d}-of-???.bin")) + save_path = os.path.join( + dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts) + 1:05d}-of-???.bin") + ) shared_weights = torch.load(switch_checkpoint_path + "-shared.pt")["model"] remove_ignore_keys_(shared_weights) shared_weights = rename_fairseq_keys(shared_weights, None) @@ -108,8 +110,8 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, num_experts, dtype, weig # Otherwise, let's build the index weight_map = {} for idx, shard in enumerate(sharded_state_dicts): - shard_file = weights_name.replace(".bin", f"-{idx+1:05d}-of-{len(sharded_state_dicts):05d}.bin") - temp_filename = os.path.join(dump_path, weights_name.replace(".bin", f"-{idx+1:05d}-of-???.bin")) + shard_file = weights_name.replace(".bin", f"-{idx + 1:05d}-of-{len(sharded_state_dicts):05d}.bin") + temp_filename = os.path.join(dump_path, weights_name.replace(".bin", f"-{idx + 1:05d}-of-???.bin")) os.rename(temp_filename, os.path.join(dump_path, shard_file)) for key in shard: weight_map[key] = shard_file diff --git a/src/transformers/models/nllb_moe/modeling_nllb_moe.py b/src/transformers/models/nllb_moe/modeling_nllb_moe.py index f48c3f51775..56c97c8870c 100644 --- a/src/transformers/models/nllb_moe/modeling_nllb_moe.py +++ b/src/transformers/models/nllb_moe/modeling_nllb_moe.py @@ -1352,7 +1352,7 @@ class NllbMoeDecoder(NllbMoePreTrainedModel): if self.gradient_checkpointing and self.training: if use_cache: logger.warning_once( - "`use_cache=True` is incompatible with gradient checkpointing. Setting" " `use_cache=False`..." + "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." ) use_cache = False diff --git a/src/transformers/models/olmo2/configuration_olmo2.py b/src/transformers/models/olmo2/configuration_olmo2.py index 3d71c1b96fd..3c1f396e0f8 100644 --- a/src/transformers/models/olmo2/configuration_olmo2.py +++ b/src/transformers/models/olmo2/configuration_olmo2.py @@ -5,7 +5,6 @@ # modular_olmo2.py file directly. One of our CI enforces this. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 - from ...configuration_utils import PretrainedConfig diff --git a/src/transformers/models/olmo2/modular_olmo2.py b/src/transformers/models/olmo2/modular_olmo2.py index bc5a9b89d50..fbd431532fd 100644 --- a/src/transformers/models/olmo2/modular_olmo2.py +++ b/src/transformers/models/olmo2/modular_olmo2.py @@ -1,7 +1,7 @@ from typing import Callable, Optional, Tuple import torch -from torch import nn +import torch.nn as nn from ...cache_utils import Cache from ...modeling_utils import ALL_ATTENTION_FUNCTIONS diff --git a/src/transformers/models/oneformer/convert_to_hf_oneformer.py b/src/transformers/models/oneformer/convert_to_hf_oneformer.py index 6e88d8a0555..960634f9f4e 100644 --- a/src/transformers/models/oneformer/convert_to_hf_oneformer.py +++ b/src/transformers/models/oneformer/convert_to_hf_oneformer.py @@ -394,11 +394,11 @@ class OriginalOneFormerCheckpointToOursConverter: [ ( f"{src_prefix}.norm{layer_idx}.weight", - f"{dst_prefix}.hidden_states_norms.stage{layer_idx+1}.weight", + f"{dst_prefix}.hidden_states_norms.stage{layer_idx + 1}.weight", ), ( f"{src_prefix}.norm{layer_idx}.bias", - f"{dst_prefix}.hidden_states_norms.stage{layer_idx+1}.bias", + f"{dst_prefix}.hidden_states_norms.stage{layer_idx + 1}.bias", ), ] ) @@ -531,11 +531,11 @@ class OriginalOneFormerCheckpointToOursConverter: [ ( f"{src_prefix}.norm{layer_idx}.weight", - f"{dst_prefix}.hidden_states_norms.stage{layer_idx+1}.weight", + f"{dst_prefix}.hidden_states_norms.stage{layer_idx + 1}.weight", ), ( f"{src_prefix}.norm{layer_idx}.bias", - f"{dst_prefix}.hidden_states_norms.stage{layer_idx+1}.bias", + f"{dst_prefix}.hidden_states_norms.stage{layer_idx + 1}.bias", ), ] ) @@ -1010,9 +1010,9 @@ def test( for original_model_feature, our_model_feature in zip( original_model_backbone_features.values(), our_model_output.encoder_hidden_states ): - assert torch.allclose( - original_model_feature, our_model_feature, atol=3e-3 - ), "The backbone features are not the same." + assert torch.allclose(original_model_feature, our_model_feature, atol=3e-3), ( + "The backbone features are not the same." + ) mask_features, _, multi_scale_features, _, _ = original_model.sem_seg_head.pixel_decoder.forward_features( original_model_backbone_features ) @@ -1025,9 +1025,9 @@ def test( for original_model_feature, our_model_feature in zip( original_pixel_decoder_features, our_model_output.pixel_decoder_hidden_states ): - assert torch.allclose( - original_model_feature, our_model_feature, atol=3e-4 - ), "The pixel decoder feature are not the same" + assert torch.allclose(original_model_feature, our_model_feature, atol=3e-4), ( + "The pixel decoder feature are not the same" + ) tr_complete = T.Compose( [ @@ -1049,9 +1049,9 @@ def test( our_segmentation = post_process_sem_seg_output(our_model_out, target_size=(640, 640))[0] - assert torch.allclose( - original_segmentation, our_segmentation, atol=1e-3 - ), "The segmentation image is not the same." + assert torch.allclose(original_segmentation, our_segmentation, atol=1e-3), ( + "The segmentation image is not the same." + ) logger.info("✅ Test passed!") diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index a30ce86ee7a..3dd0a6b86b4 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -62,9 +62,9 @@ class TFAttention(keras.layers.Layer): n_state = nx # in Attention: n_state=768 (nx=n_embd) # [switch nx => n_state from Block to Attention to keep identical to TF implementation] - assert ( - n_state % config.n_head == 0 - ), f"Hidden dimension {n_state} not dividable by number of heads {config.n_head}" + assert n_state % config.n_head == 0, ( + f"Hidden dimension {n_state} not dividable by number of heads {config.n_head}" + ) self.n_head = config.n_head self.split_size = n_state self.scale = scale diff --git a/src/transformers/models/owlv2/image_processing_owlv2.py b/src/transformers/models/owlv2/image_processing_owlv2.py index 1dfdfbd1c21..77ec2c71927 100644 --- a/src/transformers/models/owlv2/image_processing_owlv2.py +++ b/src/transformers/models/owlv2/image_processing_owlv2.py @@ -173,7 +173,7 @@ def _preprocess_resize_output_shape(image, output_shape): # multichannel case: append shape of last axis output_shape = output_shape + (image.shape[-1],) elif output_ndim < image.ndim: - raise ValueError("output_shape length cannot be smaller than the " "image number of dimensions") + raise ValueError("output_shape length cannot be smaller than the image number of dimensions") return image, output_shape @@ -345,10 +345,10 @@ class Owlv2ImageProcessor(BaseImageProcessor): else: anti_aliasing_sigma = np.atleast_1d(anti_aliasing_sigma) * np.ones_like(factors) if np.any(anti_aliasing_sigma < 0): - raise ValueError("Anti-aliasing standard deviation must be " "greater than or equal to zero") + raise ValueError("Anti-aliasing standard deviation must be greater than or equal to zero") elif np.any((anti_aliasing_sigma > 0) & (factors <= 1)): warnings.warn( - "Anti-aliasing standard deviation greater than zero but " "not down-sampling along all axes" + "Anti-aliasing standard deviation greater than zero but not down-sampling along all axes" ) filtered = ndi.gaussian_filter(image, anti_aliasing_sigma, cval=cval, mode=ndi_mode) else: diff --git a/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py b/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py index 8dfeff03ad2..059a7933775 100644 --- a/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py +++ b/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py @@ -112,17 +112,17 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { r"pretrained.blocks.(\d+).attn.qkv.(weight|bias)": r"qkv_transform_\2_\1", # Neck r"depth_head.projects.(\d+).(weight|bias)": r"neck.reassemble_stage.layers.\1.projection.\2", - r"depth_head.scratch.layer(\d+)_rn.weight": lambda m: f"neck.convs.{int(m.group(1))-1}.weight", + r"depth_head.scratch.layer(\d+)_rn.weight": lambda m: f"neck.convs.{int(m.group(1)) - 1}.weight", r"depth_head.resize_layers.(\d+).(weight|bias)": r"neck.reassemble_stage.layers.\1.resize.\2", # Refinenet (with reversed indices) - r"depth_head.scratch.refinenet(\d+).out_conv.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4-int(m.group(1))}.projection.{m.group(2)}", - r"depth_head.scratch.refinenet(\d+).resConfUnit1.conv1.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4-int(m.group(1))}.residual_layer1.convolution1.{m.group(2)}", - r"depth_head.scratch.refinenet(\d+).resConfUnit1.conv2.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4-int(m.group(1))}.residual_layer1.convolution2.{m.group(2)}", - r"depth_head.scratch.refinenet(\d+).resConfUnit2.conv1.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4-int(m.group(1))}.residual_layer2.convolution1.{m.group(2)}", - r"depth_head.scratch.refinenet(\d+).resConfUnit2.conv2.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4-int(m.group(1))}.residual_layer2.convolution2.{m.group(2)}", - r"depth_head.scratch.refinenet(\d+).resConfUnit_depth.0.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4-int(m.group(1))}.prompt_depth_layer.convolution1.{m.group(2)}", - r"depth_head.scratch.refinenet(\d+).resConfUnit_depth.2.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4-int(m.group(1))}.prompt_depth_layer.convolution2.{m.group(2)}", - r"depth_head.scratch.refinenet(\d+).resConfUnit_depth.4.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4-int(m.group(1))}.prompt_depth_layer.convolution3.{m.group(2)}", + r"depth_head.scratch.refinenet(\d+).out_conv.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4 - int(m.group(1))}.projection.{m.group(2)}", + r"depth_head.scratch.refinenet(\d+).resConfUnit1.conv1.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4 - int(m.group(1))}.residual_layer1.convolution1.{m.group(2)}", + r"depth_head.scratch.refinenet(\d+).resConfUnit1.conv2.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4 - int(m.group(1))}.residual_layer1.convolution2.{m.group(2)}", + r"depth_head.scratch.refinenet(\d+).resConfUnit2.conv1.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4 - int(m.group(1))}.residual_layer2.convolution1.{m.group(2)}", + r"depth_head.scratch.refinenet(\d+).resConfUnit2.conv2.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4 - int(m.group(1))}.residual_layer2.convolution2.{m.group(2)}", + r"depth_head.scratch.refinenet(\d+).resConfUnit_depth.0.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4 - int(m.group(1))}.prompt_depth_layer.convolution1.{m.group(2)}", + r"depth_head.scratch.refinenet(\d+).resConfUnit_depth.2.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4 - int(m.group(1))}.prompt_depth_layer.convolution2.{m.group(2)}", + r"depth_head.scratch.refinenet(\d+).resConfUnit_depth.4.(weight|bias)": lambda m: f"neck.fusion_stage.layers.{4 - int(m.group(1))}.prompt_depth_layer.convolution3.{m.group(2)}", # Head r"depth_head.scratch.output_conv1.(weight|bias)": r"head.conv1.\1", r"depth_head.scratch.output_conv2.0.(weight|bias)": r"head.conv2.\1", diff --git a/src/transformers/models/prophetnet/convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/prophetnet/convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py index 30390561169..805338511d8 100644 --- a/src/transformers/models/prophetnet/convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/prophetnet/convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py @@ -118,9 +118,9 @@ def convert_prophetnet_checkpoint_to_pytorch(prophetnet_checkpoint_path: str, py is_key_init = True break elif attribute == "position_embeddings": - assert ( - model.position_embeddings.weight.shape[-1] == old_model.embed_positions.weight.shape[-1] - ), "Hidden size has to match" + assert model.position_embeddings.weight.shape[-1] == old_model.embed_positions.weight.shape[-1], ( + "Hidden size has to match" + ) assert model.position_embeddings.weight.shape[0] == 512, "We want 512 position_embeddings." model.position_embeddings.weight = nn.Parameter(old_model.embed_positions.weight[:512, :]) is_key_init = True diff --git a/src/transformers/models/prophetnet/modeling_prophetnet.py b/src/transformers/models/prophetnet/modeling_prophetnet.py index fc148edbc49..c7230ddc7af 100644 --- a/src/transformers/models/prophetnet/modeling_prophetnet.py +++ b/src/transformers/models/prophetnet/modeling_prophetnet.py @@ -588,9 +588,9 @@ class ProphetNetPositionalEmbeddings(nn.Embedding): super().__init__(config.max_position_embeddings, config.hidden_size, config.pad_token_id) def forward(self, inputs_shape, device, attention_mask=None, past_key_values=None, position_ids=None): - assert (position_ids is None) or ( - self.padding_idx is None - ), "If position_ids is pre-computed then padding_idx should not be set." + assert (position_ids is None) or (self.padding_idx is None), ( + "If position_ids is pre-computed then padding_idx should not be set." + ) if position_ids is None: if past_key_values is not None: @@ -784,9 +784,9 @@ class ProphetNetNgramSelfAttention(nn.Module): self.head_dim = config.hidden_size // self.num_attn_heads self.ngram = config.ngram - assert ( - self.head_dim * self.num_attn_heads == config.hidden_size - ), "config.hidden_size must be divisible by num_attn_heads" + assert self.head_dim * self.num_attn_heads == config.hidden_size, ( + "config.hidden_size must be divisible by num_attn_heads" + ) # key, value, query projection self.key_proj = nn.Linear(config.hidden_size, config.hidden_size) self.value_proj = nn.Linear(config.hidden_size, config.hidden_size) @@ -1041,9 +1041,9 @@ class ProphetNetNgramSelfAttention(nn.Module): if predict_relative_position_buckets is None: key_sequence_length = attn_weights.shape[-1] - assert ( - position_ids[0][0] == key_sequence_length - 1 - ), "`position_ids` are incorrect. They should be of the format 1 2 3 4 5 ... (key_sequence_length - 1)" + assert position_ids[0][0] == key_sequence_length - 1, ( + "`position_ids` are incorrect. They should be of the format 1 2 3 4 5 ... (key_sequence_length - 1)" + ) relative_positions = ( torch.arange(0, key_sequence_length) .unsqueeze(0) @@ -1313,9 +1313,9 @@ class ProphetNetEncoder(ProphetNetPreTrainedModel): # check if head_mask has a correct number of layers specified if desired if head_mask is not None: - assert head_mask.size()[0] == ( - len(self.layers) - ), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + assert head_mask.size()[0] == (len(self.layers)), ( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + ) for idx, encoder_layer in enumerate(self.layers): if output_hidden_states: encoder_hidden_states = encoder_hidden_states + (hidden_states,) @@ -1488,9 +1488,9 @@ class ProphetNetDecoder(ProphetNetPreTrainedModel): # prepare attention mask if past_key_values is not None: - assert ( - hidden_states.size(1) == 1 - ), "At the moment `use_cache` is only supported for `decoder_input_ids` of length 1" + assert hidden_states.size(1) == 1, ( + "At the moment `use_cache` is only supported for `decoder_input_ids` of length 1" + ) ngram_hidden_states = [ (ngram_embeddings[ngram - 1] + predicting_stream_pos_embed).repeat(batch_size, 1, 1) diff --git a/src/transformers/models/pvt/convert_pvt_to_pytorch.py b/src/transformers/models/pvt/convert_pvt_to_pytorch.py index 73ae4c15718..99002e3d67c 100644 --- a/src/transformers/models/pvt/convert_pvt_to_pytorch.py +++ b/src/transformers/models/pvt/convert_pvt_to_pytorch.py @@ -162,7 +162,7 @@ def convert_pvt_checkpoint(pvt_size, pvt_checkpoint, pytorch_dump_folder_path): elif pvt_size == "large": config_path = "Zetatech/pvt-large-224" else: - raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but " f"'{pvt_size}' was given") + raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but '{pvt_size}' was given") config = PvtConfig(name_or_path=config_path) # load original model from https://github.com/whai362/PVT state_dict = torch.load(pvt_checkpoint, map_location="cpu") @@ -192,7 +192,7 @@ def convert_pvt_checkpoint(pvt_size, pvt_checkpoint, pytorch_dump_folder_path): elif pvt_size == "large": expected_slice_logits = torch.tensor([0.3740, -0.7739, -0.4214]) else: - raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but " f"'{pvt_size}' was given") + raise ValueError(f"Available model's size: 'tiny', 'small', 'medium', 'large', but '{pvt_size}' was given") assert torch.allclose(logits[0, :3], expected_slice_logits, atol=1e-4) diff --git a/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py b/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py index e397cb244c0..b5178cc2e99 100644 --- a/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py +++ b/src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py @@ -203,8 +203,7 @@ def convert_pvt_v2_checkpoint(pvt_v2_size, pvt_v2_checkpoint, pytorch_dump_folde config_path = "OpenGVLab/pvt_v2_b5" else: raise ValueError( - f"Available model sizes: 'b0', 'b1', 'b2', 'b2-linear', 'b3', 'b4', 'b5', but " - f"'{pvt_v2_size}' was given" + f"Available model sizes: 'b0', 'b1', 'b2', 'b2-linear', 'b3', 'b4', 'b5', but '{pvt_v2_size}' was given" ) config = PvtV2Config.from_pretrained(config_path) # load original model from https://github.com/whai362/PVT @@ -248,9 +247,9 @@ def convert_pvt_v2_checkpoint(pvt_v2_size, pvt_v2_checkpoint, pytorch_dump_folde f"'{pvt_v2_size}' was given" ) - assert torch.allclose( - logits[0, :3], expected_slice_logits, atol=1e-4 - ), "ImageNet weights not converted successfully." + assert torch.allclose(logits[0, :3], expected_slice_logits, atol=1e-4), ( + "ImageNet weights not converted successfully." + ) print("ImageNet weights verified, conversion successful.") diff --git a/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py b/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py index a013d0225ca..c5d45c9c9d1 100644 --- a/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py +++ b/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py @@ -623,9 +623,9 @@ class Qwen2AudioEncoder(Qwen2AudioPreTrainedModel): # check if head_mask has a correct number of layers specified if desired if head_mask is not None: - assert head_mask.size()[0] == ( - len(self.layers) - ), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + assert head_mask.size()[0] == (len(self.layers)), ( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + ) for idx, encoder_layer in enumerate(self.layers): if output_hidden_states: diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index 17a1f44aa1b..b6faa954549 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -494,9 +494,9 @@ class RagModel(RagPreTrainedModel): retriever: Optional[RagRetriever] = None, # or maybe just use a `set_retriever(...)` method **kwargs, ): - assert config is not None or ( - question_encoder is not None and generator is not None - ), "Either a configuration or an question_encoder and a generator has to be provided." + assert config is not None or (question_encoder is not None and generator is not None), ( + "Either a configuration or an question_encoder and a generator has to be provided." + ) if config is None: config = RagConfig.from_question_encoder_generator_configs( @@ -517,9 +517,9 @@ class RagModel(RagPreTrainedModel): self.retriever = retriever if self.retriever is not None: - assert isinstance( - retriever, RagRetriever - ), f"`self.retriever` is of type {type(self.retriever)}, but should be of type `RagRetriever`" + assert isinstance(retriever, RagRetriever), ( + f"`self.retriever` is of type {type(self.retriever)}, but should be of type `RagRetriever`" + ) self.retriever = retriever self.question_encoder = question_encoder @@ -660,9 +660,9 @@ class RagModel(RagPreTrainedModel): " retriever using the `set_retriever(...)` function." ) - assert ( - doc_scores is not None - ), "Make sure that `doc_scores` are passed when passing `encoder_outputs` to the forward function." + assert doc_scores is not None, ( + "Make sure that `doc_scores` are passed when passing `encoder_outputs` to the forward function." + ) assert (doc_scores.shape[1] % n_docs) == 0, ( f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is" @@ -740,9 +740,9 @@ class RagSequenceForGeneration(RagPreTrainedModel): retriever: Optional[RagRetriever] = None, **kwargs, ): - assert config is not None or ( - question_encoder is not None and generator is not None - ), "Either a configuration or an encoder and a generator has to be provided." + assert config is not None or (question_encoder is not None and generator is not None), ( + "Either a configuration or an encoder and a generator has to be provided." + ) if config is None: config = RagConfig.from_question_encoder_generator_configs( @@ -973,9 +973,9 @@ class RagSequenceForGeneration(RagPreTrainedModel): ) num_beams = num_beams if num_beams is not None else self.config.num_beams - assert ( - input_ids is not None or context_input_ids is not None - ), " At least one of input_ids or context_input_ids must be given" + assert input_ids is not None or context_input_ids is not None, ( + " At least one of input_ids or context_input_ids must be given" + ) if self.retriever is not None and context_input_ids is None: question_hidden_states = self.question_encoder(input_ids, attention_mask=attention_mask)[0] @@ -1138,9 +1138,9 @@ class RagTokenForGeneration(RagPreTrainedModel): retriever: Optional[RagRetriever] = None, **kwargs, ): - assert config is not None or ( - question_encoder is not None and generator is not None - ), "Either a configuration or an encoder and a generator has to be provided." + assert config is not None or (question_encoder is not None and generator is not None), ( + "Either a configuration or an encoder and a generator has to be provided." + ) if config is None: config = RagConfig.from_question_encoder_generator_configs( diff --git a/src/transformers/models/rag/modeling_tf_rag.py b/src/transformers/models/rag/modeling_tf_rag.py index 6714ac61a3b..babc8396109 100644 --- a/src/transformers/models/rag/modeling_tf_rag.py +++ b/src/transformers/models/rag/modeling_tf_rag.py @@ -506,9 +506,9 @@ class TFRagModel(TFRagPreTrainedModel): load_weight_prefix: Optional[str] = None, **kwargs, ): - assert config is not None or ( - question_encoder is not None and generator is not None - ), "Either a configuration or an question_encoder and a generator has to be provided." + assert config is not None or (question_encoder is not None and generator is not None), ( + "Either a configuration or an question_encoder and a generator has to be provided." + ) if config is None: config = RagConfig.from_question_encoder_generator_configs( @@ -533,9 +533,9 @@ class TFRagModel(TFRagPreTrainedModel): self.retriever = retriever if self.retriever is not None: - assert isinstance( - retriever, RagRetriever - ), f"`self.retriever` is of type {type(self.retriever)}, but should be of type `RagRetriever`" + assert isinstance(retriever, RagRetriever), ( + f"`self.retriever` is of type {type(self.retriever)}, but should be of type `RagRetriever`" + ) self.retriever = retriever self.question_encoder = question_encoder @@ -589,9 +589,9 @@ class TFRagModel(TFRagPreTrainedModel): >>> input_ids = input_dict["input_ids"] >>> outputs = model(input_ids) ```""" - assert ( - "decoder_cached_states" not in kwargs - ), "Please use past_key_values to cache intermediate outputs" # from modeling_tf_bart.py + assert "decoder_cached_states" not in kwargs, ( + "Please use past_key_values to cache intermediate outputs" + ) # from modeling_tf_bart.py # aliasing to minimize code changing n_docs = n_docs if n_docs is not None else self.config.n_docs @@ -657,9 +657,9 @@ class TFRagModel(TFRagPreTrainedModel): " retriever using the `set_retriever(...)` function." ) - assert ( - doc_scores is not None - ), "Make sure that `doc_scores` are passed when passing `encoder_outputs` to the forward function." + assert doc_scores is not None, ( + "Make sure that `doc_scores` are passed when passing `encoder_outputs` to the forward function." + ) assert (doc_scores.shape[1] % n_docs) == 0, ( f" The first dimension of `context_input_ids` should be a multiple of `n_docs`={n_docs}, but is" @@ -747,9 +747,9 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss retriever: Optional[RagRetriever] = None, **kwargs, ): - assert config is not None or ( - question_encoder is not None and generator is not None - ), "Either a configuration or an encoder and a generator has to be provided." + assert config is not None or (question_encoder is not None and generator is not None), ( + "Either a configuration or an encoder and a generator has to be provided." + ) if config is None: config = RagConfig.from_question_encoder_generator_configs( @@ -939,9 +939,9 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss >>> generated_string = tokenizer.batch_decode(generated, skip_special_tokens=True) ```""" - assert ( - "decoder_cached_states" not in kwargs - ), "Please use past_key_values to cache intermediate outputs" # from modeling_tf_bart.py + assert "decoder_cached_states" not in kwargs, ( + "Please use past_key_values to cache intermediate outputs" + ) # from modeling_tf_bart.py do_marginalize = do_marginalize if do_marginalize else self.config.do_marginalize reduce_loss = reduce_loss if reduce_loss else self.config.reduce_loss @@ -1327,9 +1327,9 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL retriever: Optional[RagRetriever] = None, **kwargs, ): - assert config is not None or ( - question_encoder is not None and generator is not None - ), "Either a configuration or an encoder and a generator has to be provided." + assert config is not None or (question_encoder is not None and generator is not None), ( + "Either a configuration or an encoder and a generator has to be provided." + ) if config is None: config = RagConfig.from_question_encoder_generator_configs( @@ -1454,9 +1454,9 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL >>> generated_string = tokenizer.batch_decode(generated, skip_special_tokens=True) ```""" - assert ( - "decoder_cached_states" not in kwargs - ), "Please use past_key_values to cache intermediate outputs" # from modeling_tf_bart.py + assert "decoder_cached_states" not in kwargs, ( + "Please use past_key_values to cache intermediate outputs" + ) # from modeling_tf_bart.py exclude_bos_score = exclude_bos_score if exclude_bos_score else self.config.exclude_bos_score reduce_loss = reduce_loss if reduce_loss else self.config.reduce_loss @@ -1663,9 +1663,9 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL ) num_beams = num_beams if num_beams is not None else self.config.num_beams - assert ( - input_ids is not None or context_input_ids is not None - ), " At least one of input_ids or context_input_ids must be given" + assert input_ids is not None or context_input_ids is not None, ( + " At least one of input_ids or context_input_ids must be given" + ) if self.retriever is not None and context_input_ids is None: question_hidden_states = self.question_encoder(input_ids, attention_mask=attention_mask)[0] diff --git a/src/transformers/models/rag/retrieval_rag.py b/src/transformers/models/rag/retrieval_rag.py index f4000aa6e7f..c7a592a6441 100644 --- a/src/transformers/models/rag/retrieval_rag.py +++ b/src/transformers/models/rag/retrieval_rag.py @@ -156,9 +156,9 @@ class LegacyIndex(Index): ) with open(resolved_meta_path, "rb") as metadata_file: self.index_id_to_db_id = pickle.load(metadata_file) - assert ( - len(self.index_id_to_db_id) == self.index.ntotal - ), "Deserialized index_id_to_db_id should match faiss index size" + assert len(self.index_id_to_db_id) == self.index.ntotal, ( + "Deserialized index_id_to_db_id should match faiss index size" + ) def is_initialized(self): return self._index_initialized diff --git a/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py b/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py index 7e287a47bfe..55cad3c8bae 100755 --- a/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py +++ b/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py @@ -150,15 +150,15 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size): position_embeddings = torch_model_reformer.embeddings.position_embeddings for emb_idx in range(len(position_embeddings.weights)): emb_weights = np.asarray(weights[3][emb_idx][0]) - assert ( - position_embeddings.weights[emb_idx].shape == emb_weights.shape - ), f"{position_embeddings[emb_idx]} emb does not match" + assert position_embeddings.weights[emb_idx].shape == emb_weights.shape, ( + f"{position_embeddings[emb_idx]} emb does not match" + ) position_embeddings.weights[emb_idx] = nn.Parameter(torch.tensor(emb_weights)) trax_layer_weights = weights[5] - assert len(torch_model_reformer.encoder.layers) * 4 == len( - trax_layer_weights - ), "HF and trax model do not have the same number of layers" + assert len(torch_model_reformer.encoder.layers) * 4 == len(trax_layer_weights), ( + "HF and trax model do not have the same number of layers" + ) for layer_idx, layer in enumerate(torch_model_reformer.encoder.layers): block_weights = trax_layer_weights[4 * layer_idx : 4 * (layer_idx + 1)] set_block_weights_in_torch(block_weights, layer, hidden_size) diff --git a/src/transformers/models/reformer/modeling_reformer.py b/src/transformers/models/reformer/modeling_reformer.py index ab6c15f5174..0fe930bd813 100755 --- a/src/transformers/models/reformer/modeling_reformer.py +++ b/src/transformers/models/reformer/modeling_reformer.py @@ -446,12 +446,12 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): # free memory del hidden_states - assert ( - query_key_vectors.shape[-1] == self.attention_head_size - ), f"last dim of query_key_vectors is {query_key_vectors.shape[-1]} but should be {self.attention_head_size}." - assert ( - value_vectors.shape[-1] == self.attention_head_size - ), f"last dim of value_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}." + assert query_key_vectors.shape[-1] == self.attention_head_size, ( + f"last dim of query_key_vectors is {query_key_vectors.shape[-1]} but should be {self.attention_head_size}." + ) + assert value_vectors.shape[-1] == self.attention_head_size, ( + f"last dim of value_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}." + ) do_standard_self_attention = (sequence_length <= self.chunk_length) or ( use_cache and past_buckets_states[1] is not None @@ -470,9 +470,9 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): # make sure buckets has correct shape for LSH attention buckets = buckets.view(batch_size, self.num_attention_heads, num_hashes * sequence_length) - assert ( - int(buckets.shape[-1]) == num_hashes * sequence_length - ), f"last dim of buckets is {buckets.shape[-1]}, but should be {num_hashes * sequence_length}" + assert int(buckets.shape[-1]) == num_hashes * sequence_length, ( + f"last dim of buckets is {buckets.shape[-1]}, but should be {num_hashes * sequence_length}" + ) sorted_bucket_idx, undo_sorted_bucket_idx = self._get_sorted_bucket_idx_and_undo_sorted_bucket_idx( sequence_length, buckets, num_hashes @@ -612,18 +612,18 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): # We sample a different random rotation for each round of hashing to # decrease the probability of hash misses. if isinstance(self.num_buckets, int): - assert ( - self.num_buckets % 2 == 0 - ), f"There should be an even number of buckets, but `self.num_buckets`: {self.num_buckets}" + assert self.num_buckets % 2 == 0, ( + f"There should be an even number of buckets, but `self.num_buckets`: {self.num_buckets}" + ) rotation_size = self.num_buckets num_buckets = self.num_buckets else: # Factorize the hash if self.num_buckets is a list or tuple rotation_size, num_buckets = 0, 1 for bucket_factor in self.num_buckets: - assert ( - bucket_factor % 2 == 0 - ), f"The number of buckets should be even, but `num_bucket`: {bucket_factor}" + assert bucket_factor % 2 == 0, ( + f"The number of buckets should be even, but `num_bucket`: {bucket_factor}" + ) rotation_size = rotation_size + bucket_factor num_buckets = num_buckets * bucket_factor @@ -1090,15 +1090,15 @@ class LocalSelfAttention(nn.Module, EfficientAttentionMixin): key_vectors = self._split_hidden_size_dim(key_vectors, self.num_attention_heads, self.attention_head_size) value_vectors = self._split_hidden_size_dim(value_vectors, self.num_attention_heads, self.attention_head_size) - assert ( - query_vectors.shape[-1] == self.attention_head_size - ), f"last dim of query_key_vectors is {query_vectors.shape[-1]} but should be {self.attention_head_size}." - assert ( - key_vectors.shape[-1] == self.attention_head_size - ), f"last dim of query_key_vectors is {key_vectors.shape[-1]} but should be {self.attention_head_size}." - assert ( - value_vectors.shape[-1] == self.attention_head_size - ), f"last dim of query_key_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}." + assert query_vectors.shape[-1] == self.attention_head_size, ( + f"last dim of query_key_vectors is {query_vectors.shape[-1]} but should be {self.attention_head_size}." + ) + assert key_vectors.shape[-1] == self.attention_head_size, ( + f"last dim of query_key_vectors is {key_vectors.shape[-1]} but should be {self.attention_head_size}." + ) + assert value_vectors.shape[-1] == self.attention_head_size, ( + f"last dim of query_key_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}." + ) if self.chunk_length is None: assert self.num_chunks_before == 0 and self.num_chunks_after == 0, ( @@ -1976,9 +1976,9 @@ class ReformerModel(ReformerPreTrainedModel): def __init__(self, config): super().__init__(config) self.config = config - assert ( - self.config.num_hidden_layers > 0 - ), "`config.attn_layers` is empty. Select at least one attn layer form ['lsh', 'local']" + assert self.config.num_hidden_layers > 0, ( + "`config.attn_layers` is empty. Select at least one attn layer form ['lsh', 'local']" + ) self.embeddings = ReformerEmbeddings(config) self.encoder = ReformerEncoder(config) @@ -2039,9 +2039,9 @@ class ReformerModel(ReformerPreTrainedModel): else: raise ValueError("You have to specify either input_ids or inputs_embeds") - assert ( - len(input_shape) == 2 - ), f"`input_ids` have be of shape `[batch_size, sequence_length]`, but got shape: {input_shape}" + assert len(input_shape) == 2, ( + f"`input_ids` have be of shape `[batch_size, sequence_length]`, but got shape: {input_shape}" + ) if past_buckets_states is not None: assert not self.training, "`past_buckets_states` can only be used for inference, not for training`." diff --git a/src/transformers/models/regnet/modeling_tf_regnet.py b/src/transformers/models/regnet/modeling_tf_regnet.py index 049c71ae6c0..7e0cc5d562b 100644 --- a/src/transformers/models/regnet/modeling_tf_regnet.py +++ b/src/transformers/models/regnet/modeling_tf_regnet.py @@ -311,7 +311,7 @@ class TFRegNetStage(keras.layers.Layer): self.layers = [ # downsampling is done in the first layer with stride of 2 layer(config, in_channels, out_channels, stride=stride, name="layers.0"), - *[layer(config, out_channels, out_channels, name=f"layers.{i+1}") for i in range(depth - 1)], + *[layer(config, out_channels, out_channels, name=f"layers.{i + 1}") for i in range(depth - 1)], ] def call(self, hidden_state): @@ -346,7 +346,7 @@ class TFRegNetEncoder(keras.layers.Layer): ) in_out_channels = zip(config.hidden_sizes, config.hidden_sizes[1:]) for i, ((in_channels, out_channels), depth) in enumerate(zip(in_out_channels, config.depths[1:])): - self.stages.append(TFRegNetStage(config, in_channels, out_channels, depth=depth, name=f"stages.{i+1}")) + self.stages.append(TFRegNetStage(config, in_channels, out_channels, depth=depth, name=f"stages.{i + 1}")) def call( self, hidden_state: tf.Tensor, output_hidden_states: bool = False, return_dict: bool = True diff --git a/src/transformers/models/seamless_m4t/convert_fairseq2_to_hf.py b/src/transformers/models/seamless_m4t/convert_fairseq2_to_hf.py index b321af02e73..7bef416ec37 100644 --- a/src/transformers/models/seamless_m4t/convert_fairseq2_to_hf.py +++ b/src/transformers/models/seamless_m4t/convert_fairseq2_to_hf.py @@ -206,7 +206,7 @@ def _convert_model( hf_model.load_state_dict(state_dict, strict=False) n_params = param_count(hf_model) - logger.info(f"model loaded: {round(n_params/1e6,1)}M params") + logger.info(f"model loaded: {round(n_params / 1e6, 1)}M params") hf_model.eval() hf_model.to(device) diff --git a/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py b/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py index 04dc8ed7d10..5cbdf0960d3 100755 --- a/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py +++ b/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py @@ -2869,7 +2869,7 @@ class SeamlessM4TForTextToText(SeamlessM4TPreTrainedModel, GenerationMixin): if tgt_lang not in self.generation_config.text_decoder_lang_to_code_id: raise ValueError( f"""`tgt_lang={tgt_lang}` is not supported by this model. Please specify a `tgt_lang` in - {', '.join(self.generation_config.text_decoder_lang_to_code_id.keys())}""" + {", ".join(self.generation_config.text_decoder_lang_to_code_id.keys())}""" ) # tgt_lang gets priority over decoder input ids text_tgt_lang_id = self.generation_config.text_decoder_lang_to_code_id.get(tgt_lang) @@ -3140,7 +3140,7 @@ class SeamlessM4TForSpeechToText(SeamlessM4TPreTrainedModel, GenerationMixin): if tgt_lang not in self.generation_config.text_decoder_lang_to_code_id: raise ValueError( f"""`tgt_lang={tgt_lang}` is not supported by this model. Please specify a `tgt_lang` in - {', '.join(self.generation_config.text_decoder_lang_to_code_id.keys())}""" + {", ".join(self.generation_config.text_decoder_lang_to_code_id.keys())}""" ) # tgt_lang gets priority over decoder input ids text_tgt_lang_id = self.generation_config.text_decoder_lang_to_code_id.get(tgt_lang) @@ -3407,7 +3407,7 @@ class SeamlessM4TForTextToSpeech(SeamlessM4TPreTrainedModel, GenerationMixin): elif tgt_lang not in lang_code_to_id: raise ValueError( f"""`tgt_lang={tgt_lang}` is not supported by this model. - Please specify a `tgt_lang` in {','.join(lang_code_to_id.keys())}. Note that SeamlessM4T supports + Please specify a `tgt_lang` in {",".join(lang_code_to_id.keys())}. Note that SeamlessM4T supports more languages for text translation than for speech synthesis.""" ) @@ -3736,7 +3736,7 @@ class SeamlessM4TForSpeechToSpeech(SeamlessM4TPreTrainedModel, GenerationMixin): elif tgt_lang not in lang_code_to_id: raise ValueError( f"""`tgt_lang={tgt_lang}` is not supported by this model. - Please specify a `tgt_lang` in {','.join(lang_code_to_id.keys())}. Note that SeamlessM4T supports + Please specify a `tgt_lang` in {",".join(lang_code_to_id.keys())}. Note that SeamlessM4T supports more languages for text translation than for speech synthesis.""" ) @@ -4151,7 +4151,7 @@ class SeamlessM4TModel(SeamlessM4TPreTrainedModel, GenerationMixin): elif tgt_lang not in lang_code_to_id: raise ValueError( f"""`tgt_lang={tgt_lang}` is not supported by this model. - Please specify a `tgt_lang` in {','.join(lang_code_to_id.keys())}. Note that SeamlessM4T supports + Please specify a `tgt_lang` in {",".join(lang_code_to_id.keys())}. Note that SeamlessM4T supports more languages for text translation than for speech synthesis.""" ) diff --git a/src/transformers/models/seamless_m4t_v2/convert_fairseq2_to_hf.py b/src/transformers/models/seamless_m4t_v2/convert_fairseq2_to_hf.py index 97a633d05ac..c75b7c8139d 100644 --- a/src/transformers/models/seamless_m4t_v2/convert_fairseq2_to_hf.py +++ b/src/transformers/models/seamless_m4t_v2/convert_fairseq2_to_hf.py @@ -207,7 +207,7 @@ def _convert_model( hf_model.load_state_dict(state_dict, strict=False) n_params = param_count(hf_model) - logger.info(f"model loaded: {round(n_params/1e6,1)}M params") + logger.info(f"model loaded: {round(n_params / 1e6, 1)}M params") hf_model.eval() hf_model.to(device) diff --git a/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py b/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py index 3bf8edd2a68..1b48297a6f1 100644 --- a/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +++ b/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py @@ -3149,7 +3149,7 @@ class SeamlessM4Tv2ForTextToText(SeamlessM4Tv2PreTrainedModel, GenerationMixin): if tgt_lang not in self.generation_config.text_decoder_lang_to_code_id: raise ValueError( f"""`tgt_lang={tgt_lang}` is not supported by this model. Please specify a `tgt_lang` in - {', '.join(self.generation_config.text_decoder_lang_to_code_id.keys())}""" + {", ".join(self.generation_config.text_decoder_lang_to_code_id.keys())}""" ) # tgt_lang gets priority over decoder input ids text_tgt_lang_id = self.generation_config.text_decoder_lang_to_code_id.get(tgt_lang) @@ -3430,7 +3430,7 @@ class SeamlessM4Tv2ForSpeechToText(SeamlessM4Tv2PreTrainedModel, GenerationMixin if tgt_lang not in self.generation_config.text_decoder_lang_to_code_id: raise ValueError( f"""`tgt_lang={tgt_lang}` is not supported by this model. Please specify a `tgt_lang` in - {', '.join(self.generation_config.text_decoder_lang_to_code_id.keys())}""" + {", ".join(self.generation_config.text_decoder_lang_to_code_id.keys())}""" ) # tgt_lang gets priority over decoder input ids text_tgt_lang_id = self.generation_config.text_decoder_lang_to_code_id.get(tgt_lang) @@ -3707,7 +3707,7 @@ class SeamlessM4Tv2ForTextToSpeech(SeamlessM4Tv2PreTrainedModel, GenerationMixin elif tgt_lang not in lang_code_to_id: raise ValueError( f"""`tgt_lang={tgt_lang}` is not supported by this model. - Please specify a `tgt_lang` in {','.join(lang_code_to_id.keys())}. Note that SeamlessM4Tv2 supports + Please specify a `tgt_lang` in {",".join(lang_code_to_id.keys())}. Note that SeamlessM4Tv2 supports more languages for text translation than for speech synthesis.""" ) @@ -4078,7 +4078,7 @@ class SeamlessM4Tv2ForSpeechToSpeech(SeamlessM4Tv2PreTrainedModel, GenerationMix elif tgt_lang not in lang_code_to_id: raise ValueError( f"""`tgt_lang={tgt_lang}` is not supported by this model. - Please specify a `tgt_lang` in {','.join(lang_code_to_id.keys())}. Note that SeamlessM4Tv2 supports + Please specify a `tgt_lang` in {",".join(lang_code_to_id.keys())}. Note that SeamlessM4Tv2 supports more languages for text translation than for speech synthesis.""" ) @@ -4539,7 +4539,7 @@ class SeamlessM4Tv2Model(SeamlessM4Tv2PreTrainedModel, GenerationMixin): elif tgt_lang not in lang_code_to_id: raise ValueError( f"""`tgt_lang={tgt_lang}` is not supported by this model. - Please specify a `tgt_lang` in {','.join(lang_code_to_id.keys())}. Note that SeamlessM4Tv2 supports + Please specify a `tgt_lang` in {",".join(lang_code_to_id.keys())}. Note that SeamlessM4Tv2 supports more languages for text translation than for speech synthesis.""" ) diff --git a/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py b/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py index dbac5ab6b89..3bbc86e433b 100644 --- a/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py +++ b/src/transformers/models/segformer/convert_segformer_original_to_pytorch.py @@ -47,13 +47,13 @@ def rename_keys(state_dict, encoder_only=False): if "patch_embed" in key: # replace for example patch_embed1 by patch_embeddings.0 idx = key[key.find("patch_embed") + len("patch_embed")] - key = key.replace(f"patch_embed{idx}", f"patch_embeddings.{int(idx)-1}") + key = key.replace(f"patch_embed{idx}", f"patch_embeddings.{int(idx) - 1}") if "norm" in key: key = key.replace("norm", "layer_norm") if "segformer.encoder.layer_norm" in key: # replace for example layer_norm1 by layer_norm.0 idx = key[key.find("segformer.encoder.layer_norm") + len("segformer.encoder.layer_norm")] - key = key.replace(f"layer_norm{idx}", f"layer_norm.{int(idx)-1}") + key = key.replace(f"layer_norm{idx}", f"layer_norm.{int(idx) - 1}") if "layer_norm1" in key: key = key.replace("layer_norm1", "layer_norm_1") if "layer_norm2" in key: @@ -61,7 +61,7 @@ def rename_keys(state_dict, encoder_only=False): if "block" in key: # replace for example block1 by block.0 idx = key[key.find("block") + len("block")] - key = key.replace(f"block{idx}", f"block.{int(idx)-1}") + key = key.replace(f"block{idx}", f"block.{int(idx) - 1}") if "attn.q" in key: key = key.replace("attn.q", "attention.self.query") if "attn.proj" in key: @@ -80,7 +80,7 @@ def rename_keys(state_dict, encoder_only=False): if "linear_c" in key: # replace for example linear_c4 by linear_c.3 idx = key[key.find("linear_c") + len("linear_c")] - key = key.replace(f"linear_c{idx}", f"linear_c.{int(idx)-1}") + key = key.replace(f"linear_c{idx}", f"linear_c.{int(idx) - 1}") if key.startswith("head"): key = key.replace("head", "classifier") new_state_dict[key] = value diff --git a/src/transformers/models/speech_encoder_decoder/convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py b/src/transformers/models/speech_encoder_decoder/convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py index 874aa2e066f..bdd57c84f54 100644 --- a/src/transformers/models/speech_encoder_decoder/convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py +++ b/src/transformers/models/speech_encoder_decoder/convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py @@ -192,41 +192,41 @@ def load_adapter(full_name, value, adapter, unused_weights): if "proj_ln" in full_name: # has to be layer norm if "bias" in name: - assert ( - value.shape == adapter.proj_layer_norm.bias.data.shape - ), f"{full_name} has size {value.shape}, but {adapter.proj_layer_norm.bias.data.shape} was found." + assert value.shape == adapter.proj_layer_norm.bias.data.shape, ( + f"{full_name} has size {value.shape}, but {adapter.proj_layer_norm.bias.data.shape} was found." + ) adapter.proj_layer_norm.bias.data = value logger.info(f"Adapter proj layer norm bias was initialized from {full_name}.") if "weight" in name: - assert ( - value.shape == adapter.proj_layer_norm.weight.data.shape - ), f"{full_name} has size {value.shape}, but {adapter.proj_layer_norm.weight.data.shape} was found." + assert value.shape == adapter.proj_layer_norm.weight.data.shape, ( + f"{full_name} has size {value.shape}, but {adapter.proj_layer_norm.weight.data.shape} was found." + ) adapter.proj_layer_norm.weight.data = value else: # has to be projection layer if "bias" in name: - assert ( - value.shape == adapter.proj.bias.data.shape - ), f"{full_name} has size {value.shape}, but {adapter.proj.bias.data.shape} was found." + assert value.shape == adapter.proj.bias.data.shape, ( + f"{full_name} has size {value.shape}, but {adapter.proj.bias.data.shape} was found." + ) adapter.proj.bias.data = value logger.info(f"Adapter proj layer bias was initialized from {full_name}.") if "weight" in name: - assert ( - value.shape == adapter.proj.weight.data.shape - ), f"{full_name} has size {value.shape}, but {adapter.proj.weight.data.shape} was found." + assert value.shape == adapter.proj.weight.data.shape, ( + f"{full_name} has size {value.shape}, but {adapter.proj.weight.data.shape} was found." + ) adapter.proj.weight.data = value logger.info(f"Adapter proj layer weight was initialized from {full_name}.") elif isinstance(layer_id, int): if "bias" in name: - assert ( - value.shape == adapter.layers[layer_id].conv.bias.data.shape - ), f"{full_name} has size {value.shape}, but {adapter.layers[layer_id].conv.bias.data.shape} was found." + assert value.shape == adapter.layers[layer_id].conv.bias.data.shape, ( + f"{full_name} has size {value.shape}, but {adapter.layers[layer_id].conv.bias.data.shape} was found." + ) adapter.layers[layer_id].conv.bias.data = value logger.info(f"Adapter layer {layer_id} bias was initialized from {full_name}.") elif "weight" in name: - assert ( - value.shape == adapter.layers[layer_id].conv.weight.data.shape - ), f"{full_name} has size {value.shape}, but {adapter.layers[layer_id].conv.weight.data.shape} was found." + assert value.shape == adapter.layers[layer_id].conv.weight.data.shape, ( + f"{full_name} has size {value.shape}, but {adapter.layers[layer_id].conv.weight.data.shape} was found." + ) adapter.layers[layer_id].conv.weight.data = value logger.info(f"Adapter layer {layer_id} bias was initialized from {full_name}.") else: diff --git a/src/transformers/models/speech_to_text/modeling_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_speech_to_text.py index 2e4f64c5120..ccff216c98c 100755 --- a/src/transformers/models/speech_to_text/modeling_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_speech_to_text.py @@ -774,9 +774,9 @@ class Speech2TextEncoder(Speech2TextPreTrainedModel): # check if head_mask has a correct number of layers specified if desired if head_mask is not None: - assert head_mask.size()[0] == ( - len(self.layers) - ), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + assert head_mask.size()[0] == (len(self.layers)), ( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + ) for idx, encoder_layer in enumerate(self.layers): if output_hidden_states: diff --git a/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py b/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py index f0531283395..29fe2e3e25d 100644 --- a/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py +++ b/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py @@ -224,9 +224,9 @@ def convert_swin2sr_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to [[-0.5238, -0.5557, -0.6321], [-0.6016, -0.5903, -0.6391], [-0.6244, -0.6334, -0.6889]] ) - assert ( - outputs.reconstruction.shape == expected_shape - ), f"Shape of reconstruction should be {expected_shape}, but is {outputs.reconstruction.shape}" + assert outputs.reconstruction.shape == expected_shape, ( + f"Shape of reconstruction should be {expected_shape}, but is {outputs.reconstruction.shape}" + ) assert torch.allclose(outputs.reconstruction[0, 0, :3, :3], expected_slice, atol=1e-3) print("Looks ok!") diff --git a/src/transformers/models/switch_transformers/convert_big_switch.py b/src/transformers/models/switch_transformers/convert_big_switch.py index e4b8af07cd4..70652c10cf1 100644 --- a/src/transformers/models/switch_transformers/convert_big_switch.py +++ b/src/transformers/models/switch_transformers/convert_big_switch.py @@ -103,7 +103,7 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, max_shard_size, dtype, w # If this weight is going to tip up over the maximal size, we split. if current_block_size + weight_size > max_shard_size: save_path = os.path.join( - dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts)+1:05d}-of-???.bin") + dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts) + 1:05d}-of-???.bin") ) rename_and_save_block(current_block, save_path) sharded_state_dicts.append(current_block.keys()) @@ -116,7 +116,9 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, max_shard_size, dtype, w total_size += weight_size # Add the last block - save_path = os.path.join(dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts)+1:05d}-of-???.bin")) + save_path = os.path.join( + dump_path, weights_name.replace(".bin", f"-{len(sharded_state_dicts) + 1:05d}-of-???.bin") + ) rename_and_save_block(current_block, save_path) sharded_state_dicts.append(current_block.keys()) @@ -129,9 +131,9 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, max_shard_size, dtype, w shards = {} for idx, shard in enumerate(sharded_state_dicts): shard_file = weights_name.replace( - ".bin", f"-{idx+1:05d}-of-{len(sharded_state_dicts):05d}.bin" + ".bin", f"-{idx + 1:05d}-of-{len(sharded_state_dicts):05d}.bin" ) # len(sharded_state_dicts):05d} - temp_filename = os.path.join(dump_path, weights_name.replace(".bin", f"-{idx+1:05d}-of-???.bin")) + temp_filename = os.path.join(dump_path, weights_name.replace(".bin", f"-{idx + 1:05d}-of-???.bin")) os.rename(temp_filename, os.path.join(dump_path, shard_file)) shards[shard_file] = shard for key in shard: diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index f7d2b23f8b1..84f5b2a6361 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -363,9 +363,9 @@ class TFT5Attention(keras.layers.Layer): real_seq_length = seq_length if past_key_value is not None: - assert ( - len(past_key_value) == 2 - ), f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states" + assert len(past_key_value) == 2, ( + f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states" + ) real_seq_length += shape_list(past_key_value[0])[2] if query_length is None else query_length key_length = real_seq_length if key_value_states is None else shape_list(key_value_states)[1] diff --git a/src/transformers/models/tapas/modeling_tapas.py b/src/transformers/models/tapas/modeling_tapas.py index cf22fe242f5..53d9a9d6bae 100644 --- a/src/transformers/models/tapas/modeling_tapas.py +++ b/src/transformers/models/tapas/modeling_tapas.py @@ -1284,9 +1284,9 @@ class TapasForQuestionAnswering(TapasPreTrainedModel): aggregate_mask = None else: if float_answer is not None: - assert ( - labels.shape[0] == float_answer.shape[0] - ), "Make sure the answers are a FloatTensor of shape (batch_size,)" + assert labels.shape[0] == float_answer.shape[0], ( + "Make sure the answers are a FloatTensor of shape (batch_size,)" + ) # [batch_size] aggregate_mask = _calculate_aggregate_mask( float_answer, @@ -1336,9 +1336,9 @@ class TapasForQuestionAnswering(TapasPreTrainedModel): if is_supervised: # Note that `aggregate_mask` is None if the setting is supervised. if aggregation_labels is not None: - assert ( - labels.shape[0] == aggregation_labels.shape[0] - ), "Make sure the aggregation labels are a LongTensor of shape (batch_size,)" + assert labels.shape[0] == aggregation_labels.shape[0], ( + "Make sure the aggregation labels are a LongTensor of shape (batch_size,)" + ) per_example_additional_loss = _calculate_aggregation_loss( logits_aggregation, aggregate_mask, diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py index b73c3e93b9b..82430deebfe 100644 --- a/src/transformers/models/tapas/modeling_tf_tapas.py +++ b/src/transformers/models/tapas/modeling_tf_tapas.py @@ -1562,9 +1562,9 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel): aggregate_mask = None else: if float_answer is not None: - assert ( - shape_list(labels)[0] == shape_list(float_answer)[0] - ), "Make sure the answers are a FloatTensor of shape (batch_size,)" + assert shape_list(labels)[0] == shape_list(float_answer)[0], ( + "Make sure the answers are a FloatTensor of shape (batch_size,)" + ) # [batch_size] aggregate_mask = _calculate_aggregate_mask( float_answer, @@ -1615,9 +1615,9 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel): if is_supervised: # Note that `aggregate_mask` is None if the setting is supervised. if aggregation_labels is not None: - assert ( - shape_list(labels)[0] == shape_list(aggregation_labels)[0] - ), "Make sure the aggregation labels are a LongTensor of shape (batch_size,)" + assert shape_list(labels)[0] == shape_list(aggregation_labels)[0], ( + "Make sure the aggregation labels are a LongTensor of shape (batch_size,)" + ) per_example_additional_loss = _calculate_aggregation_loss( logits_aggregation, aggregate_mask, diff --git a/src/transformers/models/tvp/modeling_tvp.py b/src/transformers/models/tvp/modeling_tvp.py index 1d6d9a0106d..16dd5c0a375 100644 --- a/src/transformers/models/tvp/modeling_tvp.py +++ b/src/transformers/models/tvp/modeling_tvp.py @@ -773,7 +773,7 @@ TVP_PROMPTER_CLASSES_MAPPING = { @add_start_docstrings( - "The bare Tvp Model transformer outputting BaseModelOutputWithPooling object without any specific head on" " top.", + "The bare Tvp Model transformer outputting BaseModelOutputWithPooling object without any specific head on top.", TVP_START_DOCSTRING, ) class TvpModel(TvpPreTrainedModel): diff --git a/src/transformers/models/udop/modeling_udop.py b/src/transformers/models/udop/modeling_udop.py index 089434ca827..93d128562e4 100644 --- a/src/transformers/models/udop/modeling_udop.py +++ b/src/transformers/models/udop/modeling_udop.py @@ -407,8 +407,7 @@ class UdopPatchEmbeddings(nn.Module): batch_size, num_channels, height, width = pixel_values.shape if height != self.image_size[0] or width != self.image_size[1]: raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" - f" ({self.image_size[0]}*{self.image_size[1]})." + f"Input image size ({height}*{width}) doesn't match model ({self.image_size[0]}*{self.image_size[1]})." ) embeddings = self.proj(pixel_values) embeddings = embeddings.flatten(2).transpose(1, 2) diff --git a/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py index dd03623c880..59733539415 100644 --- a/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py @@ -84,9 +84,9 @@ def convert_visual_bert_checkpoint(checkpoint_path, pytorch_dump_folder_path): Copy/paste/tweak model's weights to our VisualBERT structure. """ - assert ( - checkpoint_path.split("/")[-1] in ACCEPTABLE_CHECKPOINTS - ), f"The checkpoint provided must be in {ACCEPTABLE_CHECKPOINTS}." + assert checkpoint_path.split("/")[-1] in ACCEPTABLE_CHECKPOINTS, ( + f"The checkpoint provided must be in {ACCEPTABLE_CHECKPOINTS}." + ) # Get Config if "pre" in checkpoint_path: diff --git a/src/transformers/models/vitpose/convert_vitpose_to_hf.py b/src/transformers/models/vitpose/convert_vitpose_to_hf.py index b1e55628a31..0d36e332a4f 100644 --- a/src/transformers/models/vitpose/convert_vitpose_to_hf.py +++ b/src/transformers/models/vitpose/convert_vitpose_to_hf.py @@ -229,7 +229,7 @@ def write_model(model_name, model_path, push_to_hub, check_logits=True): elif re.search("head", new_key) and not config.use_simple_decoder: # Pattern for deconvolution layers deconv_pattern = r"deconv_layers\.(0|3)\.weight" - new_key = re.sub(deconv_pattern, lambda m: f"deconv{int(m.group(1))//3 + 1}.weight", new_key) + new_key = re.sub(deconv_pattern, lambda m: f"deconv{int(m.group(1)) // 3 + 1}.weight", new_key) # Pattern for batch normalization layers bn_patterns = [ (r"deconv_layers\.(\d+)\.weight", r"batchnorm\1.weight"), diff --git a/src/transformers/models/vivit/modeling_vivit.py b/src/transformers/models/vivit/modeling_vivit.py index bd6ce5234f0..238a723dfab 100755 --- a/src/transformers/models/vivit/modeling_vivit.py +++ b/src/transformers/models/vivit/modeling_vivit.py @@ -72,8 +72,7 @@ class VivitTubeletEmbeddings(nn.Module): batch_size, num_frames, num_channels, height, width = pixel_values.shape if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): raise ValueError( - f"Image image size ({height}*{width}) doesn't match model" - f" ({self.image_size[0]}*{self.image_size[1]})." + f"Image image size ({height}*{width}) doesn't match model ({self.image_size[0]}*{self.image_size[1]})." ) # permute to (batch_size, num_channels, num_frames, height, width) diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py index 3b118162d7c..1cfbeb43a5e 100644 --- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py @@ -614,7 +614,7 @@ class TFWav2Vec2FeatureEncoder(keras.layers.Layer): if config.feat_extract_norm == "group": conv_layers = [TFWav2Vec2GroupNormConvLayer(config, layer_id=0, name=f"conv_layers.{0}")] + [ - TFWav2Vec2NoLayerNormConvLayer(config, layer_id=i + 1, name=f"conv_layers.{i+1}") + TFWav2Vec2NoLayerNormConvLayer(config, layer_id=i + 1, name=f"conv_layers.{i + 1}") for i in range(config.num_feat_extract_layers - 1) ] elif config.feat_extract_norm == "layer": diff --git a/src/transformers/models/wav2vec2_bert/convert_wav2vec2_seamless_checkpoint.py b/src/transformers/models/wav2vec2_bert/convert_wav2vec2_seamless_checkpoint.py index adead75bf5d..33510654dcc 100644 --- a/src/transformers/models/wav2vec2_bert/convert_wav2vec2_seamless_checkpoint.py +++ b/src/transformers/models/wav2vec2_bert/convert_wav2vec2_seamless_checkpoint.py @@ -113,7 +113,7 @@ def _convert_model( hf_model.load_state_dict(state_dict, strict=True) n_params = param_count(hf_model) - logger.info(f"model loaded: {round(n_params/1e6,1)}M params") + logger.info(f"model loaded: {round(n_params / 1e6, 1)}M params") hf_model.eval() del state_dict diff --git a/src/transformers/models/whisper/modeling_whisper.py b/src/transformers/models/whisper/modeling_whisper.py index dacd147c151..727a68f8571 100644 --- a/src/transformers/models/whisper/modeling_whisper.py +++ b/src/transformers/models/whisper/modeling_whisper.py @@ -1043,9 +1043,9 @@ class WhisperEncoder(WhisperPreTrainedModel): # check if head_mask has a correct number of layers specified if desired if head_mask is not None: - assert head_mask.size()[0] == ( - len(self.layers) - ), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + assert head_mask.size()[0] == (len(self.layers)), ( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + ) for idx, encoder_layer in enumerate(self.layers): if output_hidden_states: diff --git a/src/transformers/models/x_clip/modeling_x_clip.py b/src/transformers/models/x_clip/modeling_x_clip.py index 4cf2af1da5c..4f1aa6e3223 100644 --- a/src/transformers/models/x_clip/modeling_x_clip.py +++ b/src/transformers/models/x_clip/modeling_x_clip.py @@ -167,7 +167,7 @@ class XCLIPVisionEmbeddings(nn.Module): batch_size, _, height, width = pixel_values.shape if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." + f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})." ) target_dtype = self.patch_embedding.weight.dtype patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] diff --git a/src/transformers/models/xglm/modeling_xglm.py b/src/transformers/models/xglm/modeling_xglm.py index 0ca0fa77fc5..9d1adf73701 100755 --- a/src/transformers/models/xglm/modeling_xglm.py +++ b/src/transformers/models/xglm/modeling_xglm.py @@ -601,8 +601,7 @@ class XGLMModel(XGLMPreTrainedModel): if self.gradient_checkpointing and self.training: if use_cache: logger.warning_once( - "`use_cache = True` is incompatible with gradient checkpointing`. Setting `use_cache =" - " False`..." + "`use_cache = True` is incompatible with gradient checkpointing`. Setting `use_cache = False`..." ) use_cache = False diff --git a/src/transformers/models/xlnet/modeling_xlnet.py b/src/transformers/models/xlnet/modeling_xlnet.py index 91f2d09f96f..f689e417bfe 100755 --- a/src/transformers/models/xlnet/modeling_xlnet.py +++ b/src/transformers/models/xlnet/modeling_xlnet.py @@ -164,15 +164,15 @@ def load_tf_weights_in_xlnet(model, config, tf_path): array = np.transpose(array) if isinstance(pointer, list): # Here we will split the TF weights - assert ( - len(pointer) == array.shape[0] - ), f"Pointer length {len(pointer)} and array length {array.shape[0]} mismatched" + assert len(pointer) == array.shape[0], ( + f"Pointer length {len(pointer)} and array length {array.shape[0]} mismatched" + ) for i, p_i in enumerate(pointer): arr_i = array[i, ...] try: - assert ( - p_i.shape == arr_i.shape - ), f"Pointer shape {p_i.shape} and array shape {arr_i.shape} mismatched" + assert p_i.shape == arr_i.shape, ( + f"Pointer shape {p_i.shape} and array shape {arr_i.shape} mismatched" + ) except AssertionError as e: e.args += (p_i.shape, arr_i.shape) raise @@ -180,9 +180,9 @@ def load_tf_weights_in_xlnet(model, config, tf_path): p_i.data = torch.from_numpy(arr_i) else: try: - assert ( - pointer.shape == array.shape - ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" + assert pointer.shape == array.shape, ( + f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" + ) except AssertionError as e: e.args += (pointer.shape, array.shape) raise diff --git a/src/transformers/models/zamba/configuration_zamba.py b/src/transformers/models/zamba/configuration_zamba.py index df165154a00..46d99a32272 100644 --- a/src/transformers/models/zamba/configuration_zamba.py +++ b/src/transformers/models/zamba/configuration_zamba.py @@ -203,9 +203,9 @@ class ZambaConfig(PretrainedConfig): self.layers_block_type = self._layers_block_type(num_hidden_layers, attn_layer_period, attn_layer_offset) - assert ( - self.mamba_expand * self.hidden_size - ) % self.n_mamba_heads == 0, "`intermediate_size` should be divisible by `n_mamba_heads`." + assert (self.mamba_expand * self.hidden_size) % self.n_mamba_heads == 0, ( + "`intermediate_size` should be divisible by `n_mamba_heads`." + ) super().__init__( pad_token_id=pad_token_id, diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py index 804dc4af075..899a7cc5390 100644 --- a/src/transformers/pipelines/document_question_answering.py +++ b/src/transformers/pipelines/document_question_answering.py @@ -339,7 +339,7 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline): ) if self.model_type == ModelType.VisionEncoderDecoder: - task_prompt = f'{input["question"]}' + task_prompt = f"{input['question']}" # Adapted from https://huggingface.co/spaces/nielsr/donut-docvqa/blob/main/app.py encoding = { "inputs": image_features["pixel_values"], diff --git a/src/transformers/pipelines/text2text_generation.py b/src/transformers/pipelines/text2text_generation.py index 9bc75445502..0adefdffb9f 100644 --- a/src/transformers/pipelines/text2text_generation.py +++ b/src/transformers/pipelines/text2text_generation.py @@ -290,7 +290,7 @@ class SummarizationPipeline(Text2TextGenerationPipeline): logger.warning( f"Your max_length is set to {max_length}, but your input_length is only {input_length}. Since this is " "a summarization task, where outputs shorter than the input are typically wanted, you might " - f"consider decreasing max_length manually, e.g. summarizer('...', max_length={input_length//2})" + f"consider decreasing max_length manually, e.g. summarizer('...', max_length={input_length // 2})" ) diff --git a/src/transformers/quantizers/quantizer_fbgemm_fp8.py b/src/transformers/quantizers/quantizer_fbgemm_fp8.py index dd0927765d1..69c55d01bdf 100644 --- a/src/transformers/quantizers/quantizer_fbgemm_fp8.py +++ b/src/transformers/quantizers/quantizer_fbgemm_fp8.py @@ -104,8 +104,7 @@ class FbgemmFp8HfQuantizer(HfQuantizer): ) elif torch_dtype == torch.float16: raise ValueError( - "You cannot use FP8 with torch_dtype=torch.float16." - "We recommend you passing torch_dtype=torch.bfloat16" + "You cannot use FP8 with torch_dtype=torch.float16.We recommend you passing torch_dtype=torch.bfloat16" ) return torch_dtype diff --git a/src/transformers/quantizers/quantizer_torchao.py b/src/transformers/quantizers/quantizer_torchao.py index d15f09be8e1..81c0710288e 100644 --- a/src/transformers/quantizers/quantizer_torchao.py +++ b/src/transformers/quantizers/quantizer_torchao.py @@ -257,8 +257,7 @@ class TorchAoHfQuantizer(HfQuantizer): def is_serializable(self, safe_serialization=None) -> bool: if safe_serialization: logger.warning( - "torchao quantized model does not support safe serialization, " - "please set `safe_serialization` to False" + "torchao quantized model does not support safe serialization, please set `safe_serialization` to False" ) return False _is_torchao_serializable = version.parse(importlib.metadata.version("huggingface_hub")) >= version.parse( diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 0c3c22deeb9..548c4887816 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -868,7 +868,7 @@ class SpecialTokensMixin: def __init__(self, verbose=False, **kwargs): self._pad_token_type_id = 0 self.verbose = verbose - self._special_tokens_map = {attr: None for attr in self.SPECIAL_TOKENS_ATTRIBUTES} + self._special_tokens_map = dict.fromkeys(self.SPECIAL_TOKENS_ATTRIBUTES) self._special_tokens_map["additional_special_tokens"] = [] # for BC where it defaults to empty list # We directly set the hidden value to allow initialization with special tokens @@ -881,9 +881,9 @@ class SpecialTokensMixin: if key in self.SPECIAL_TOKENS_ATTRIBUTES: if key == "additional_special_tokens": assert isinstance(value, (list, tuple)), f"Value {value} is not a list or tuple" - assert all( - isinstance(t, (str, AddedToken)) for t in value - ), "One of the tokens is not a string or an AddedToken" + assert all(isinstance(t, (str, AddedToken)) for t in value), ( + "One of the tokens is not a string or an AddedToken" + ) setattr(self, key, value) elif isinstance(value, (str, AddedToken)): setattr(self, key, value) @@ -967,9 +967,9 @@ class SpecialTokensMixin: logger.info(f"Assigning {value} to the {key} key of the tokenizer") if key == "additional_special_tokens": - assert isinstance(value, (list, tuple)) and all( - isinstance(t, (str, AddedToken)) for t in value - ), f"Tokens {value} for key {key} should all be str or AddedToken instances" + assert isinstance(value, (list, tuple)) and all(isinstance(t, (str, AddedToken)) for t in value), ( + f"Tokens {value} for key {key} should all be str or AddedToken instances" + ) to_add = [] for token in value: @@ -3379,9 +3379,9 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): return BatchEncoding(encoded_inputs, tensor_type=return_tensors) batch_size = len(required_input) - assert all( - len(v) == batch_size for v in encoded_inputs.values() - ), "Some items in the output dictionary have a different batch size than others." + assert all(len(v) == batch_size for v in encoded_inputs.values()), ( + "Some items in the output dictionary have a different batch size than others." + ) if padding_strategy == PaddingStrategy.LONGEST: max_length = max(len(inputs) for inputs in required_input) diff --git a/src/transformers/trainer_callback.py b/src/transformers/trainer_callback.py index ba54eb0def9..7a4892159f5 100644 --- a/src/transformers/trainer_callback.py +++ b/src/transformers/trainer_callback.py @@ -749,12 +749,12 @@ class EarlyStoppingCallback(TrainerCallback, ExportableState): "Using EarlyStoppingCallback without load_best_model_at_end=True. " "Once training is finished, the best model will not be loaded automatically." ) - assert ( - args.metric_for_best_model is not None - ), "EarlyStoppingCallback requires metric_for_best_model to be defined" - assert ( - args.eval_strategy != IntervalStrategy.NO - ), "EarlyStoppingCallback requires IntervalStrategy of steps or epoch" + assert args.metric_for_best_model is not None, ( + "EarlyStoppingCallback requires metric_for_best_model to be defined" + ) + assert args.eval_strategy != IntervalStrategy.NO, ( + "EarlyStoppingCallback requires IntervalStrategy of steps or epoch" + ) def on_evaluate(self, args, state, control, metrics, **kwargs): metric_to_check = args.metric_for_best_model diff --git a/src/transformers/trainer_pt_utils.py b/src/transformers/trainer_pt_utils.py index e12aad9a0bb..b0f5635cf5b 100644 --- a/src/transformers/trainer_pt_utils.py +++ b/src/transformers/trainer_pt_utils.py @@ -121,9 +121,9 @@ def nested_concat(tensors, new_tensors, padding_index=-100): nested list/tuples/dict of tensors. """ if not (isinstance(tensors, torch.Tensor) and isinstance(new_tensors, torch.Tensor)): - assert ( - type(tensors) is type(new_tensors) - ), f"Expected `tensors` and `new_tensors` to have the same type but found {type(tensors)} and {type(new_tensors)}." + assert type(tensors) is type(new_tensors), ( + f"Expected `tensors` and `new_tensors` to have the same type but found {type(tensors)} and {type(new_tensors)}." + ) if isinstance(tensors, (list, tuple)): return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors)) elif isinstance(tensors, torch.Tensor): @@ -381,15 +381,15 @@ class SequentialDistributedSampler(Sampler): # add extra samples to make it evenly divisible indices += indices[: (self.total_size - len(indices))] - assert ( - len(indices) == self.total_size - ), f"Indices length {len(indices)} and total size {self.total_size} mismatched" + assert len(indices) == self.total_size, ( + f"Indices length {len(indices)} and total size {self.total_size} mismatched" + ) # subsample indices = indices[self.rank * self.num_samples : (self.rank + 1) * self.num_samples] - assert ( - len(indices) == self.num_samples - ), f"Indices length {len(indices)} and sample number {self.num_samples} mismatched" + assert len(indices) == self.num_samples, ( + f"Indices length {len(indices)} and sample number {self.num_samples} mismatched" + ) return iter(indices) @@ -506,9 +506,9 @@ class DistributedTensorGatherer: if isinstance(arrays, (list, tuple)): result = [self._nested_set_tensors(x, y) for x, y in zip(storage, arrays)] return result[0][0], type(arrays)(r[1] for r in result) - assert ( - arrays.shape[0] % self.world_size == 0 - ), f"Arrays passed should all have a first dimension multiple of {self.world_size}, found {arrays.shape[0]}." + assert arrays.shape[0] % self.world_size == 0, ( + f"Arrays passed should all have a first dimension multiple of {self.world_size}, found {arrays.shape[0]}." + ) slice_len = arrays.shape[0] // self.world_size for i in range(self.world_size): diff --git a/src/transformers/utils/chat_template_utils.py b/src/transformers/utils/chat_template_utils.py index db1db042e5b..e101fadc2a0 100644 --- a/src/transformers/utils/chat_template_utils.py +++ b/src/transformers/utils/chat_template_utils.py @@ -412,7 +412,7 @@ def _compile_jinja_template(chat_template): if version.parse(jinja2.__version__) < version.parse("3.1.0"): raise ImportError( - "apply_chat_template requires jinja2>=3.1.0 to be installed. Your version is " f"{jinja2.__version__}." + f"apply_chat_template requires jinja2>=3.1.0 to be installed. Your version is {jinja2.__version__}." ) def raise_exception(message): diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 01d19c21405..34c3999d9bb 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -513,7 +513,9 @@ def cached_files( return None # Now we raise for missing entries revision_ = "main" if revision is None else revision - msg = f"a file named {missing_entries[0]}" if len(missing_entries) == 1 else f"files named {*missing_entries,}" + msg = ( + f"a file named {missing_entries[0]}" if len(missing_entries) == 1 else f"files named {(*missing_entries,)}" + ) raise EnvironmentError( f"{path_or_repo_id} does not appear to have {msg}. Checkout 'https://huggingface.co/{path_or_repo_id}/tree/{revision_}'" "for available files." diff --git a/src/transformers/utils/logging.py b/src/transformers/utils/logging.py index 67f70b96edd..150cf8e132a 100644 --- a/src/transformers/utils/logging.py +++ b/src/transformers/utils/logging.py @@ -65,7 +65,7 @@ def _get_default_logging_level(): else: logging.getLogger().warning( f"Unknown option TRANSFORMERS_VERBOSITY={env_level_str}, " - f"has to be one of: { ', '.join(log_levels.keys()) }" + f"has to be one of: {', '.join(log_levels.keys())}" ) return _default_log_level diff --git a/src/transformers/utils/notebook.py b/src/transformers/utils/notebook.py index eff8a28459e..94fc1990b57 100644 --- a/src/transformers/utils/notebook.py +++ b/src/transformers/utils/notebook.py @@ -186,7 +186,7 @@ class NotebookProgressBar: if self.average_time_per_item == 0: self.label += ", +inf it/s" else: - self.label += f", {1/self.average_time_per_item:.2f} it/s" + self.label += f", {1 / self.average_time_per_item:.2f} it/s" self.label += "]" if self.comment is None or len(self.comment) == 0 else f", {self.comment}]" self.display() diff --git a/src/transformers/utils/quantization_config.py b/src/transformers/utils/quantization_config.py index 60789e786a5..32c249fb511 100644 --- a/src/transformers/utils/quantization_config.py +++ b/src/transformers/utils/quantization_config.py @@ -1636,16 +1636,16 @@ class TorchAoConfig(QuantizationConfigMixin): def from_dict(cls, config_dict, return_unused_kwargs=False, **kwargs): """Create configuration from a dictionary.""" ao_verison = cls._get_ao_version() - assert ao_verison >= version.parse( - "0.10.0" - ), "TorchAoConfig requires torchao >= 0.10.0 for construction from dict" + assert ao_verison >= version.parse("0.10.0"), ( + "TorchAoConfig requires torchao >= 0.10.0 for construction from dict" + ) config_dict = config_dict.copy() quant_type = config_dict.pop("quant_type") # Check if we only have one key which is "default" # In the future we may update this - assert ( - len(quant_type) == 1 and "default" in quant_type - ), "Expected only one key 'default' in quant_type dictionary" + assert len(quant_type) == 1 and "default" in quant_type, ( + "Expected only one key 'default' in quant_type dictionary" + ) quant_type = quant_type["default"] # Deserialize quant_type if needed diff --git a/tests/models/fuyu/test_image_processing_fuyu.py b/tests/models/fuyu/test_image_processing_fuyu.py index a9930e2fb81..fd9fea1f741 100644 --- a/tests/models/fuyu/test_image_processing_fuyu.py +++ b/tests/models/fuyu/test_image_processing_fuyu.py @@ -42,9 +42,9 @@ class TestFuyuImageProcessor(unittest.TestCase): expected_num_patches = self.processor.get_num_patches(image_height=self.height, image_width=self.width) patches_final = self.processor.patchify_image(image=self.image_input) - assert ( - patches_final.shape[1] == expected_num_patches - ), f"Expected {expected_num_patches} patches, got {patches_final.shape[1]}." + assert patches_final.shape[1] == expected_num_patches, ( + f"Expected {expected_num_patches} patches, got {patches_final.shape[1]}." + ) def test_scale_to_target_aspect_ratio(self): # (h:450, w:210) fitting (160, 320) -> (160, 210*160/450) diff --git a/tests/models/gpt2/test_modeling_gpt2.py b/tests/models/gpt2/test_modeling_gpt2.py index 0f66e168188..b58859a642b 100644 --- a/tests/models/gpt2/test_modeling_gpt2.py +++ b/tests/models/gpt2/test_modeling_gpt2.py @@ -431,9 +431,9 @@ class GPT2ModelTester: model.eval() # We want this for SDPA, eager works with a `None` attention mask - assert ( - model.config._attn_implementation == "sdpa" - ), "This test assumes the model to have the SDPA implementation for its attention calculations." + assert model.config._attn_implementation == "sdpa", ( + "This test assumes the model to have the SDPA implementation for its attention calculations." + ) # Prepare cache and non_cache input, needs a full attention mask cached_len = input_ids.shape[-1] // 2 diff --git a/tests/models/gpt_neox/test_modeling_gpt_neox.py b/tests/models/gpt_neox/test_modeling_gpt_neox.py index 874c62f0c8a..45906d60777 100644 --- a/tests/models/gpt_neox/test_modeling_gpt_neox.py +++ b/tests/models/gpt_neox/test_modeling_gpt_neox.py @@ -222,9 +222,9 @@ class GPTNeoXModelTester: model.eval() # We want this for SDPA, eager works with a `None` attention mask - assert ( - model.config._attn_implementation == "sdpa" - ), "This test assumes the model to have the SDPA implementation for its attention calculations." + assert model.config._attn_implementation == "sdpa", ( + "This test assumes the model to have the SDPA implementation for its attention calculations." + ) # Prepare cache and non_cache input, needs a full attention mask cached_len = input_ids.shape[-1] // 2 diff --git a/tests/models/mask2former/test_image_processing_mask2former.py b/tests/models/mask2former/test_image_processing_mask2former.py index aaca13dbc36..f2c5cd77946 100644 --- a/tests/models/mask2former/test_image_processing_mask2former.py +++ b/tests/models/mask2former/test_image_processing_mask2former.py @@ -315,7 +315,7 @@ class Mask2FormerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase inst2class = {} for label in class_labels: instance_ids = np.unique(instance_seg[class_id_map == label]) - inst2class.update({i: label for i in instance_ids}) + inst2class.update(dict.fromkeys(instance_ids, label)) return instance_seg, inst2class diff --git a/tests/models/maskformer/test_image_processing_maskformer.py b/tests/models/maskformer/test_image_processing_maskformer.py index d042c702a60..d97522261c0 100644 --- a/tests/models/maskformer/test_image_processing_maskformer.py +++ b/tests/models/maskformer/test_image_processing_maskformer.py @@ -269,7 +269,7 @@ class MaskFormerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase) inst2class = {} for label in class_labels: instance_ids = np.unique(instance_seg[class_id_map == label]) - inst2class.update({i: label for i in instance_ids}) + inst2class.update(dict.fromkeys(instance_ids, label)) return instance_seg, inst2class diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py index da57a002c4f..4633f497f90 100644 --- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py +++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py @@ -1458,9 +1458,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): chunked_output = speech_recognizer(inputs.copy(), chunk_length_s=30) non_chunked_output = speech_recognizer(inputs.copy()) - assert ( - chunked_output.keys() == non_chunked_output.keys() - ), "The output structure should be the same for chunked vs non-chunked versions of asr pipelines." + assert chunked_output.keys() == non_chunked_output.keys(), ( + "The output structure should be the same for chunked vs non-chunked versions of asr pipelines." + ) @require_torch def test_return_timestamps_ctc_fast(self): diff --git a/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py b/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py index 2e3d9fdc7f2..e48717914e0 100644 --- a/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py +++ b/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py @@ -145,9 +145,9 @@ class DataTrainingArguments: train_extension = self.train_file.split(".")[-1] assert train_extension in ["csv", "json"], "`train_file` should be a csv or a json file." validation_extension = self.validation_file.split(".")[-1] - assert ( - validation_extension == train_extension - ), "`validation_file` should have the same extension (csv or json) as `train_file`." + assert validation_extension == train_extension, ( + "`validation_file` should have the same extension (csv or json) as `train_file`." + ) @dataclass @@ -265,9 +265,9 @@ def main(): if data_args.test_file is not None: train_extension = data_args.train_file.split(".")[-1] test_extension = data_args.test_file.split(".")[-1] - assert ( - test_extension == train_extension - ), "`test_file` should have the same extension (csv or json) as `train_file`." + assert test_extension == train_extension, ( + "`test_file` should have the same extension (csv or json) as `train_file`." + ) data_files["test"] = data_args.test_file else: raise ValueError("Need either a GLUE task or a test file for `do_predict`.") diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 58c09d41788..5454140a688 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -3234,9 +3234,9 @@ class ModelTesterMixin: for model_class in self.all_model_classes: model = model_class(config) num_params = model.num_parameters() - assert ( - num_params < 1000000 - ), f"{model_class} is too big for the common tests ({num_params})! It should have 1M max." + assert num_params < 1000000, ( + f"{model_class} is too big for the common tests ({num_params})! It should have 1M max." + ) @require_flash_attn @require_torch_gpu diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 6ea5d785231..c0eea11012d 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -3005,9 +3005,9 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): ) trainer.train() # Check that we have the last known step: - assert os.path.exists( - os.path.join(tmp_dir, f"checkpoint-{trainer.state.max_steps}") - ), f"Could not find checkpoint-{trainer.state.max_steps}" + assert os.path.exists(os.path.join(tmp_dir, f"checkpoint-{trainer.state.max_steps}")), ( + f"Could not find checkpoint-{trainer.state.max_steps}" + ) # And then check the last step assert os.path.exists(os.path.join(tmp_dir, "checkpoint-9")), "Could not find checkpoint-9" diff --git a/tests/trainer/test_trainer_seq2seq.py b/tests/trainer/test_trainer_seq2seq.py index 793225f5ae8..d43edd19fff 100644 --- a/tests/trainer/test_trainer_seq2seq.py +++ b/tests/trainer/test_trainer_seq2seq.py @@ -180,9 +180,9 @@ class Seq2seqTrainerTester(TestCasePlus): for num_return_sequences in range(3, 0, -1): gen_config.num_return_sequences = num_return_sequences metrics = trainer.evaluate(eval_dataset=prepared_dataset, generation_config=gen_config) - assert ( - metrics["eval_samples"] == dataset_len * num_return_sequences - ), f"Got {metrics['eval_samples']}, expected: {dataset_len * num_return_sequences}" + assert metrics["eval_samples"] == dataset_len * num_return_sequences, ( + f"Got {metrics['eval_samples']}, expected: {dataset_len * num_return_sequences}" + ) @require_torch def test_bad_generation_config_fail_early(self): diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py index 5721e5913ce..1c4ff3ddc90 100644 --- a/utils/check_docstrings.py +++ b/utils/check_docstrings.py @@ -736,7 +736,7 @@ def replace_default_in_arg_description(description: str, default: Any) -> str: elif _re_parse_description.search(description) is None: idx = description.find(OPTIONAL_KEYWORD) len_optional = len(OPTIONAL_KEYWORD) - description = f"{description[:idx + len_optional]}, defaults to {str_default}" + description = f"{description[: idx + len_optional]}, defaults to {str_default}" else: description = _re_parse_description.sub(rf"*optional*, defaults to {str_default}", description) diff --git a/utils/download_glue_data.py b/utils/download_glue_data.py index 22e9fcae471..64cd3752024 100644 --- a/utils/download_glue_data.py +++ b/utils/download_glue_data.py @@ -79,9 +79,11 @@ def format_mrpc(data_dir, path_to_data): for row in ids_fh: dev_ids.append(row.strip().split("\t")) - with open(mrpc_train_file, encoding="utf8") as data_fh, open( - os.path.join(mrpc_dir, "train.tsv"), "w", encoding="utf8" - ) as train_fh, open(os.path.join(mrpc_dir, "dev.tsv"), "w", encoding="utf8") as dev_fh: + with ( + open(mrpc_train_file, encoding="utf8") as data_fh, + open(os.path.join(mrpc_dir, "train.tsv"), "w", encoding="utf8") as train_fh, + open(os.path.join(mrpc_dir, "dev.tsv"), "w", encoding="utf8") as dev_fh, + ): header = data_fh.readline() train_fh.write(header) dev_fh.write(header) @@ -92,9 +94,10 @@ def format_mrpc(data_dir, path_to_data): else: train_fh.write("%s\t%s\t%s\t%s\t%s\n" % (label, id1, id2, s1, s2)) - with open(mrpc_test_file, encoding="utf8") as data_fh, open( - os.path.join(mrpc_dir, "test.tsv"), "w", encoding="utf8" - ) as test_fh: + with ( + open(mrpc_test_file, encoding="utf8") as data_fh, + open(os.path.join(mrpc_dir, "test.tsv"), "w", encoding="utf8") as test_fh, + ): header = data_fh.readline() test_fh.write("index\t#1 ID\t#2 ID\t#1 String\t#2 String\n") for idx, row in enumerate(data_fh): diff --git a/utils/get_github_job_time.py b/utils/get_github_job_time.py index af59081ffd4..155d859bba6 100644 --- a/utils/get_github_job_time.py +++ b/utils/get_github_job_time.py @@ -68,4 +68,4 @@ if __name__ == "__main__": job_time = dict(sorted(job_time.items(), key=lambda item: item[1]["duration"], reverse=True)) for k, v in job_time.items(): - print(f'{k}: {v["duration"]}') + print(f"{k}: {v['duration']}") diff --git a/utils/modular_model_converter.py b/utils/modular_model_converter.py index aedbf696d5b..55e71c4cc91 100644 --- a/utils/modular_model_converter.py +++ b/utils/modular_model_converter.py @@ -513,7 +513,7 @@ def find_all_dependencies( all_dependencies = set() all_dependencies_with_parent = [] checked_dependencies = set(initial_checked_dependencies) - parents = {initial_dep: start_entity for initial_dep in initial_dependencies} + parents = dict.fromkeys(initial_dependencies, start_entity) while len(dependency_queue) > 0: # Pick element to visit current = dependency_queue.popleft() @@ -524,7 +524,7 @@ def find_all_dependencies( if current in dependency_mapping.keys(): # Update dependency queue dependency_queue.extend(dependency_mapping[current]) - parents.update({dep: current for dep in dependency_mapping[current]}) + parents.update(dict.fromkeys(dependency_mapping[current], current)) # add visited node to the list checked_dependencies.add(current) diff --git a/utils/notification_service.py b/utils/notification_service.py index f3bcfcd4f26..66db34e00c2 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -665,7 +665,7 @@ class Message: failure_text = "" for idx, error in enumerate(failures): - new_text = failure_text + f'*{error["line"]}*\n_{error["trace"]}_\n\n' + new_text = failure_text + f"*{error['line']}*\n_{error['trace']}_\n\n" if len(new_text) > MAX_ERROR_TEXT: # `failure_text` here has length <= 3000 failure_text = failure_text + "[Truncated]" @@ -728,7 +728,7 @@ class Message: if error["line"] in prev_error_lines: continue - new_text = f'{error["line"]}\n\n' + new_text = f"{error['line']}\n\n" if new_text not in all_failure_lines: all_failure_lines[new_text] = [] @@ -794,7 +794,7 @@ class Message: job_result, failures, device, - text=f'Number of failures: {job_result["failed"][device]}', + text=f"Number of failures: {job_result['failed'][device]}", ) print("Sending the following reply") diff --git a/utils/notification_service_quantization.py b/utils/notification_service_quantization.py index 0264797c94e..f15aa68f907 100644 --- a/utils/notification_service_quantization.py +++ b/utils/notification_service_quantization.py @@ -152,7 +152,7 @@ class QuantizationMessage(Message): job_result, failures, device, - text=f'Number of failures: {job_result["failed"][device]}', + text=f"Number of failures: {job_result['failed'][device]}", ) print("Sending the following reply") @@ -203,7 +203,7 @@ if __name__ == "__main__": "job_link": {}, } for quant in quantization_matrix - if f"run_quantization_torch_gpu_{ quant }_test_reports" in available_artifacts + if f"run_quantization_torch_gpu_{quant}_test_reports" in available_artifacts } github_actions_jobs = get_jobs( @@ -220,7 +220,7 @@ if __name__ == "__main__": break for quant in quantization_results.keys(): - for artifact_path in available_artifacts[f"run_quantization_torch_gpu_{ quant }_test_reports"].paths: + for artifact_path in available_artifacts[f"run_quantization_torch_gpu_{quant}_test_reports"].paths: artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"]) if "stats" in artifact: # Link to the GitHub Action job diff --git a/utils/past_ci_versions.py b/utils/past_ci_versions.py index 61495ab2a46..858f7184d70 100644 --- a/utils/past_ci_versions.py +++ b/utils/past_ci_versions.py @@ -116,8 +116,8 @@ if __name__ == "__main__": info = past_versions_testing[args.framework][args.version] - os.system(f'echo "export INSTALL_CMD=\'{info["install"]}\'" >> ~/.profile') - print(f'echo "export INSTALL_CMD=\'{info["install"]}\'" >> ~/.profile') + os.system(f"echo \"export INSTALL_CMD='{info['install']}'\" >> ~/.profile") + print(f"echo \"export INSTALL_CMD='{info['install']}'\" >> ~/.profile") cuda = "" if args.framework == "pytorch": diff --git a/utils/process_circleci_workflow_test_reports.py b/utils/process_circleci_workflow_test_reports.py index 944bc47a7e2..eb61f6d586e 100644 --- a/utils/process_circleci_workflow_test_reports.py +++ b/utils/process_circleci_workflow_test_reports.py @@ -37,12 +37,12 @@ if __name__ == "__main__": for job in jobs: project_slug = job["project_slug"] if job["name"].startswith(("tests_", "examples_", "pipelines_")): - url = f'https://circleci.com/api/v2/project/{project_slug}/{job["job_number"]}/artifacts' + url = f"https://circleci.com/api/v2/project/{project_slug}/{job['job_number']}/artifacts" r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")}) job_artifacts = r.json()["items"] os.makedirs(job["name"], exist_ok=True) - os.makedirs(f'outputs/{job["name"]}', exist_ok=True) + os.makedirs(f"outputs/{job['name']}", exist_ok=True) job_test_summaries = {} for artifact in job_artifacts: @@ -67,7 +67,7 @@ if __name__ == "__main__": workflow_summary[job["name"]] = summary # collected version - with open(f'outputs/{job["name"]}/test_summary.json', "w") as fp: + with open(f"outputs/{job['name']}/test_summary.json", "w") as fp: json.dump(summary, fp, indent=4) new_workflow_summary = {} diff --git a/utils/update_metadata.py b/utils/update_metadata.py index d2023ff4679..d8df28c2a32 100755 --- a/utils/update_metadata.py +++ b/utils/update_metadata.py @@ -247,7 +247,7 @@ def update_pipeline_and_auto_class_table(table: Dict[str, Tuple[str, str]]) -> D model_names.extend(list(name)) # Add pipeline tag and auto model class for those models - table.update({model_name: (pipeline_tag, cls) for model_name in model_names}) + table.update(dict.fromkeys(model_names, (pipeline_tag, cls))) return table