mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 04:40:06 +06:00
More PYUP fixes (#38883)
More pyup fixes Signed-off-by: cyy <cyyever@outlook.com>
This commit is contained in:
parent
12d4c5b66f
commit
1fc67a25c6
@ -710,8 +710,8 @@ class AssistantToTargetTranslator:
|
||||
assistant_model: Optional["PreTrainedModel"] = None,
|
||||
assistant_prune_lm_head: bool = False,
|
||||
):
|
||||
self._target_tokenizer: "PreTrainedTokenizerBase" = target_tokenizer
|
||||
self._assistant_tokenizer: "PreTrainedTokenizerBase" = assistant_tokenizer
|
||||
self._target_tokenizer: PreTrainedTokenizerBase = target_tokenizer
|
||||
self._assistant_tokenizer: PreTrainedTokenizerBase = assistant_tokenizer
|
||||
self._assistant_model_device: str = (
|
||||
assistant_model_device if assistant_model is None else assistant_model.device
|
||||
)
|
||||
|
@ -72,7 +72,7 @@ class TextStreamer(BaseStreamer):
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, **decode_kwargs):
|
||||
def __init__(self, tokenizer: AutoTokenizer, skip_prompt: bool = False, **decode_kwargs):
|
||||
self.tokenizer = tokenizer
|
||||
self.skip_prompt = skip_prompt
|
||||
self.decode_kwargs = decode_kwargs
|
||||
@ -206,7 +206,7 @@ class TextIteratorStreamer(TextStreamer):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs
|
||||
self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs
|
||||
):
|
||||
super().__init__(tokenizer, skip_prompt, **decode_kwargs)
|
||||
self.text_queue = Queue()
|
||||
@ -284,7 +284,7 @@ class AsyncTextIteratorStreamer(TextStreamer):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs
|
||||
self, tokenizer: AutoTokenizer, skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs
|
||||
):
|
||||
super().__init__(tokenizer, skip_prompt, **decode_kwargs)
|
||||
self.text_queue = asyncio.Queue()
|
||||
|
@ -4723,7 +4723,7 @@ class GenerationMixin(ContinuousMixin):
|
||||
)
|
||||
|
||||
if return_dict_in_generate and output_scores:
|
||||
beam_indices = tuple((beam_indices[beam_idx[i]] + (beam_idx[i],) for i in range(len(beam_indices))))
|
||||
beam_indices = tuple(beam_indices[beam_idx[i]] + (beam_idx[i],) for i in range(len(beam_indices)))
|
||||
|
||||
# increase cur_len
|
||||
cur_len = cur_len + 1
|
||||
|
@ -1626,8 +1626,8 @@ class NeptuneCallback(TrainerCallback):
|
||||
target_path = consistent_checkpoint_path
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"NeptuneCallback was unable to made a copy of checkpoint due to I/O exception: '{}'. "
|
||||
"Could fail trying to upload.".format(e)
|
||||
f"NeptuneCallback was unable to made a copy of checkpoint due to I/O exception: '{e}'. "
|
||||
"Could fail trying to upload."
|
||||
)
|
||||
|
||||
self._metadata_namespace[self._target_checkpoints_namespace].upload_files(target_path)
|
||||
@ -1976,9 +1976,7 @@ class ClearMLCallback(TrainerCallback):
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Could not remove checkpoint `{}` after going over the `save_total_limit`. Error is: {}".format(
|
||||
self._checkpoints_saved[0].name, e
|
||||
)
|
||||
f"Could not remove checkpoint `{self._checkpoints_saved[0].name}` after going over the `save_total_limit`. Error is: {e}"
|
||||
)
|
||||
break
|
||||
self._checkpoints_saved = self._checkpoints_saved[1:]
|
||||
|
@ -1409,10 +1409,10 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT
|
||||
|
||||
def prepare_tf_dataset(
|
||||
self,
|
||||
dataset: "datasets.Dataset", # noqa:F821
|
||||
dataset: datasets.Dataset, # noqa:F821
|
||||
batch_size: int = 8,
|
||||
shuffle: bool = True,
|
||||
tokenizer: Optional["PreTrainedTokenizerBase"] = None,
|
||||
tokenizer: Optional[PreTrainedTokenizerBase] = None,
|
||||
collate_fn: Optional[Callable] = None,
|
||||
collate_fn_args: Optional[dict[str, Any]] = None,
|
||||
drop_remainder: Optional[bool] = None,
|
||||
|
@ -4424,10 +4424,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, PushToHubMixin, PeftAdapterMi
|
||||
raise ValueError("DeepSpeed Zero-3 is not compatible with passing a `device_map`.")
|
||||
if not is_accelerate_available():
|
||||
raise ValueError(
|
||||
(
|
||||
"Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` "
|
||||
"requires `accelerate`. You can install it with `pip install accelerate`"
|
||||
)
|
||||
"Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` "
|
||||
"requires `accelerate`. You can install it with `pip install accelerate`"
|
||||
)
|
||||
|
||||
# handling bnb config from kwargs, remove after `load_in_{4/8}bit` deprecation.
|
||||
|
@ -203,7 +203,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
|
||||
pieces = self.sp_model.encode(text, out_type=str)
|
||||
new_pieces = []
|
||||
for piece in pieces:
|
||||
if len(piece) > 1 and piece[-1] == str(",") and piece[-2].isdigit():
|
||||
if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit():
|
||||
# Logic to handle special cases see https://github.com/google-research/bert/blob/master/README.md#tokenization
|
||||
# `9,9` -> ['▁9', ',', '9'] instead of [`_9,`, '9']
|
||||
cur_pieces = self.sp_model.EncodeAsPieces(piece[:-1].replace(SPIECE_UNDERLINE, ""))
|
||||
|
@ -830,7 +830,7 @@ class BambaMixer(nn.Module):
|
||||
|
||||
# 2. Compute the state for each intra-chunk
|
||||
# (right term of low-rank factorization of off-diagonal blocks; B terms)
|
||||
decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum))
|
||||
decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum)
|
||||
B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None]
|
||||
states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2)
|
||||
|
||||
|
@ -632,7 +632,7 @@ class BambaMixer(nn.Module):
|
||||
|
||||
# 2. Compute the state for each intra-chunk
|
||||
# (right term of low-rank factorization of off-diagonal blocks; B terms)
|
||||
decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum))
|
||||
decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum)
|
||||
B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None]
|
||||
states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2)
|
||||
|
||||
|
@ -32,7 +32,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"}
|
||||
# See all BART models at https://huggingface.co/models?filter=bart
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control
|
||||
|
@ -110,7 +110,7 @@ class BeitDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
# Based on timm implementation, which can be found here:
|
||||
@ -513,8 +513,8 @@ class BeitLayer(nn.Module):
|
||||
|
||||
init_values = config.layer_scale_init_value
|
||||
if init_values > 0:
|
||||
self.lambda_1 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
|
||||
self.lambda_2 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
|
||||
self.lambda_1 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
|
||||
self.lambda_2 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
|
||||
else:
|
||||
self.lambda_1, self.lambda_2 = None, None
|
||||
|
||||
|
@ -934,7 +934,7 @@ class SentencepieceTokenizer:
|
||||
pieces = self.sp_model.encode(text, out_type=str)
|
||||
new_pieces = []
|
||||
for piece in pieces:
|
||||
if len(piece) > 1 and piece[-1] == str(",") and piece[-2].isdigit():
|
||||
if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit():
|
||||
cur_pieces = self.sp_model.EncodeAsPieces(piece[:-1].replace(SPIECE_UNDERLINE, ""))
|
||||
if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE:
|
||||
if len(cur_pieces[0]) == 1:
|
||||
|
@ -115,7 +115,7 @@ class Dictionary:
|
||||
except FileNotFoundError as fnfe:
|
||||
raise fnfe
|
||||
except UnicodeError:
|
||||
raise Exception("Incorrect encoding detected in {}, please rebuild the dataset".format(f))
|
||||
raise Exception(f"Incorrect encoding detected in {f}, please rebuild the dataset")
|
||||
return
|
||||
|
||||
lines = f.readlines()
|
||||
@ -133,11 +133,11 @@ class Dictionary:
|
||||
word = line
|
||||
if word in self and not overwrite:
|
||||
raise RuntimeError(
|
||||
"Duplicate word found when loading Dictionary: '{}'. "
|
||||
f"Duplicate word found when loading Dictionary: '{word}'. "
|
||||
"Duplicate words can overwrite earlier ones by adding the "
|
||||
"#fairseq:overwrite flag at the end of the corresponding row "
|
||||
"in the dictionary file. If using the Camembert model, please "
|
||||
"download an updated copy of the model file.".format(word)
|
||||
"download an updated copy of the model file."
|
||||
)
|
||||
self.add_symbol(word, n=count, overwrite=overwrite)
|
||||
except ValueError:
|
||||
|
@ -310,7 +310,7 @@ class BitDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
def make_div(value, divisor=8):
|
||||
|
@ -35,7 +35,7 @@ VOCAB_FILES_NAMES = {
|
||||
}
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
# Copied from transformers.models.roberta.tokenization_roberta.bytes_to_unicode
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
|
@ -641,9 +641,7 @@ class BlipTextModel(BlipTextPreTrainedModel):
|
||||
extended_attention_mask = attention_mask[:, None, None, :]
|
||||
else:
|
||||
raise ValueError(
|
||||
"Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format(
|
||||
input_shape, attention_mask.shape
|
||||
)
|
||||
f"Wrong shape for input_ids (shape {input_shape}) or attention_mask (shape {attention_mask.shape})"
|
||||
)
|
||||
|
||||
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
|
||||
@ -723,7 +721,7 @@ class BlipTextModel(BlipTextPreTrainedModel):
|
||||
past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
|
||||
|
||||
if attention_mask is None:
|
||||
attention_mask = torch.ones(((batch_size, seq_length + past_key_values_length))).to(device)
|
||||
attention_mask = torch.ones((batch_size, seq_length + past_key_values_length)).to(device)
|
||||
|
||||
# We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
|
||||
# ourselves in which case we just need to make it broadcastable to all heads.
|
||||
|
@ -800,9 +800,7 @@ class TFBlipTextModel(TFBlipTextPreTrainedModel):
|
||||
extended_attention_mask = attention_mask[:, None, None, :]
|
||||
else:
|
||||
raise ValueError(
|
||||
"Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format(
|
||||
input_shape, attention_mask.shape
|
||||
)
|
||||
f"Wrong shape for input_ids (shape {input_shape}) or attention_mask (shape {attention_mask.shape})"
|
||||
)
|
||||
|
||||
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
|
||||
@ -881,7 +879,7 @@ class TFBlipTextModel(TFBlipTextPreTrainedModel):
|
||||
past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
|
||||
|
||||
if attention_mask is None:
|
||||
attention_mask = tf.ones(((batch_size, seq_length + past_key_values_length)))
|
||||
attention_mask = tf.ones((batch_size, seq_length + past_key_values_length))
|
||||
|
||||
# We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
|
||||
# ourselves in which case we just need to make it broadcastable to all heads.
|
||||
|
@ -1144,9 +1144,7 @@ class Blip2QFormerModel(Blip2PreTrainedModel):
|
||||
extended_attention_mask = attention_mask[:, None, None, :]
|
||||
else:
|
||||
raise ValueError(
|
||||
"Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format(
|
||||
input_shape, attention_mask.shape
|
||||
)
|
||||
f"Wrong shape for input_ids (shape {input_shape}) or attention_mask (shape {attention_mask.shape})"
|
||||
)
|
||||
|
||||
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
|
||||
|
@ -98,7 +98,7 @@ def convert_bloom_checkpoint_to_pytorch(
|
||||
config = BloomConfig()
|
||||
|
||||
for j, file in enumerate(file_names):
|
||||
print("Processing file: {}".format(file))
|
||||
print(f"Processing file: {file}")
|
||||
tensors = None
|
||||
|
||||
for i in range(pretraining_tp):
|
||||
@ -132,7 +132,7 @@ def convert_bloom_checkpoint_to_pytorch(
|
||||
tensors,
|
||||
os.path.join(
|
||||
pytorch_dump_folder_path,
|
||||
"pytorch_model_{}-of-{}.bin".format(str(j + 1).zfill(5), str(len(file_names)).zfill(5)),
|
||||
f"pytorch_model_{str(j + 1).zfill(5)}-of-{str(len(file_names)).zfill(5)}.bin",
|
||||
),
|
||||
)
|
||||
|
||||
@ -140,8 +140,8 @@ def convert_bloom_checkpoint_to_pytorch(
|
||||
value = tensors[key]
|
||||
total_size += value.numel() * get_dtype_size(value.dtype)
|
||||
if key not in index_dict["weight_map"]:
|
||||
index_dict["weight_map"][key] = "pytorch_model_{}-of-{}.bin".format(
|
||||
str(j + 1).zfill(5), str(len(file_names)).zfill(5)
|
||||
index_dict["weight_map"][key] = (
|
||||
f"pytorch_model_{str(j + 1).zfill(5)}-of-{str(len(file_names)).zfill(5)}.bin"
|
||||
)
|
||||
|
||||
config = BloomConfig()
|
||||
|
@ -610,7 +610,7 @@ class ClapAudioLayer(nn.Module):
|
||||
mask_windows = window_partition(img_mask, self.window_size)
|
||||
mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
|
||||
attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
|
||||
attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
|
||||
attn_mask = attn_mask.masked_fill(attn_mask != 0, -100.0).masked_fill(attn_mask == 0, 0.0)
|
||||
else:
|
||||
attn_mask = None
|
||||
return attn_mask
|
||||
|
@ -34,7 +34,7 @@ VOCAB_FILES_NAMES = {
|
||||
}
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control
|
||||
@ -488,7 +488,7 @@ class CLIPTokenizer(PreTrainedTokenizer):
|
||||
|
||||
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple[str]:
|
||||
if not os.path.isdir(save_directory):
|
||||
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
|
||||
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
|
||||
return
|
||||
vocab_file = os.path.join(
|
||||
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
|
||||
@ -506,8 +506,8 @@ class CLIPTokenizer(PreTrainedTokenizer):
|
||||
for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]):
|
||||
if index != token_index:
|
||||
logger.warning(
|
||||
"Saving vocabulary to {}: BPE merge indices are not consecutive."
|
||||
" Please check that the tokenizer is not corrupted!".format(merge_file)
|
||||
f"Saving vocabulary to {merge_file}: BPE merge indices are not consecutive."
|
||||
" Please check that the tokenizer is not corrupted!"
|
||||
)
|
||||
index = token_index
|
||||
writer.write(" ".join(bpe_tokens) + "\n")
|
||||
|
@ -181,7 +181,7 @@ def convert_clipseg_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_
|
||||
missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
|
||||
|
||||
if missing_keys != ["clip.text_model.embeddings.position_ids", "clip.vision_model.embeddings.position_ids"]:
|
||||
raise ValueError("Missing keys that are not expected: {}".format(missing_keys))
|
||||
raise ValueError(f"Missing keys that are not expected: {missing_keys}")
|
||||
if unexpected_keys != ["decoder.reduce.weight", "decoder.reduce.bias"]:
|
||||
raise ValueError(f"Unexpected keys: {unexpected_keys}")
|
||||
|
||||
|
@ -34,7 +34,7 @@ VOCAB_FILES_NAMES = {
|
||||
}
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
# Copied from transformers.models.gpt2.tokenization_gpt2.bytes_to_unicode
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
|
@ -42,7 +42,7 @@ VOCAB_FILES_NAMES = {
|
||||
}
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control
|
||||
|
@ -70,7 +70,7 @@ class ConvNextDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
class ConvNextLayerNorm(nn.Module):
|
||||
@ -149,7 +149,7 @@ class ConvNextLayer(nn.Module):
|
||||
self.act = ACT2FN[config.hidden_act]
|
||||
self.pwconv2 = nn.Linear(4 * dim, dim)
|
||||
self.layer_scale_parameter = (
|
||||
nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True)
|
||||
nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True)
|
||||
if config.layer_scale_init_value > 0
|
||||
else None
|
||||
)
|
||||
|
@ -70,7 +70,7 @@ class ConvNextV2DropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
class ConvNextV2GRN(nn.Module):
|
||||
|
@ -207,7 +207,7 @@ class CpmTokenizer(PreTrainedTokenizer):
|
||||
pieces = self.sp_model.encode(text, out_type=str)
|
||||
new_pieces = []
|
||||
for piece in pieces:
|
||||
if len(piece) > 1 and piece[-1] == str(",") and piece[-2].isdigit():
|
||||
if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit():
|
||||
cur_pieces = self.sp_model.EncodeAsPieces(piece[:-1].replace(SPIECE_UNDERLINE, ""))
|
||||
if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE:
|
||||
if len(cur_pieces[0]) == 1:
|
||||
|
@ -86,7 +86,7 @@ class CvtDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
class CvtEmbeddings(nn.Module):
|
||||
|
@ -187,7 +187,7 @@ class DFineMultiscaleDeformableAttention(nn.Module):
|
||||
sampling_locations = reference_points[:, :, None, :, :2] + offset
|
||||
else:
|
||||
raise ValueError(
|
||||
"Last dim of reference_points must be 2 or 4, but get {} instead.".format(reference_points.shape[-1])
|
||||
f"Last dim of reference_points must be 2 or 4, but get {reference_points.shape[-1]} instead."
|
||||
)
|
||||
|
||||
output = self.ms_deformable_attn_core(
|
||||
|
@ -517,7 +517,7 @@ class DFineMultiscaleDeformableAttention(nn.Module):
|
||||
sampling_locations = reference_points[:, :, None, :, :2] + offset
|
||||
else:
|
||||
raise ValueError(
|
||||
"Last dim of reference_points must be 2 or 4, but get {} instead.".format(reference_points.shape[-1])
|
||||
f"Last dim of reference_points must be 2 or 4, but get {reference_points.shape[-1]} instead."
|
||||
)
|
||||
|
||||
output = self.ms_deformable_attn_core(
|
||||
|
@ -384,7 +384,7 @@ def gen_sine_position_embeddings(pos_tensor, hidden_size=256):
|
||||
|
||||
pos = torch.cat((pos_y, pos_x, pos_w, pos_h), dim=2)
|
||||
else:
|
||||
raise ValueError("Unknown pos_tensor shape(-1):{}".format(pos_tensor.size(-1)))
|
||||
raise ValueError(f"Unknown pos_tensor shape(-1):{pos_tensor.size(-1)}")
|
||||
return pos
|
||||
|
||||
|
||||
@ -1254,7 +1254,7 @@ class DabDetrModel(DabDetrPreTrainedModel):
|
||||
|
||||
self.num_patterns = config.num_patterns
|
||||
if not isinstance(self.num_patterns, int):
|
||||
logger.warning("num_patterns should be int but {}".format(type(self.num_patterns)))
|
||||
logger.warning(f"num_patterns should be int but {type(self.num_patterns)}")
|
||||
self.num_patterns = 0
|
||||
if self.num_patterns > 0:
|
||||
self.patterns = nn.Embedding(self.num_patterns, self.hidden_size)
|
||||
|
@ -157,24 +157,12 @@ def recursively_load_weights(orig_dict, hf_model, model_name):
|
||||
elif len(mapped_key) == 3:
|
||||
integers = re.findall(r"\b\d+\b", name)
|
||||
if mapped_key[0][0] == "d":
|
||||
mapped_key = "{}.{}.{}{}.{}".format(
|
||||
mapped_key[0],
|
||||
str(int(integers[0]) - 1),
|
||||
mapped_key[1],
|
||||
str(int(integers[1]) - 1),
|
||||
mapped_key[2],
|
||||
)
|
||||
mapped_key = f"{mapped_key[0]}.{str(int(integers[0]) - 1)}.{mapped_key[1]}{str(int(integers[1]) - 1)}.{mapped_key[2]}"
|
||||
else:
|
||||
mapped_key = "{}.{}.{}{}.{}".format(
|
||||
mapped_key[0],
|
||||
str(int(integers[0]) - 1),
|
||||
mapped_key[1],
|
||||
str(int(integers[1]) + 1),
|
||||
mapped_key[2],
|
||||
)
|
||||
mapped_key = f"{mapped_key[0]}.{str(int(integers[0]) - 1)}.{mapped_key[1]}{str(int(integers[1]) + 1)}.{mapped_key[2]}"
|
||||
elif len(mapped_key) == 2:
|
||||
integers = re.findall(r"\b\d+\b", name)
|
||||
mapped_key = "{}.{}.{}".format(mapped_key[0], str(int(integers[0]) - 1), mapped_key[1])
|
||||
mapped_key = f"{mapped_key[0]}.{str(int(integers[0]) - 1)}.{mapped_key[1]}"
|
||||
|
||||
is_used = True
|
||||
if "weight_g" in name:
|
||||
|
@ -185,18 +185,12 @@ def load_beit_model(args, is_finetuned, is_large):
|
||||
missing_keys = warn_missing_keys
|
||||
|
||||
if len(missing_keys) > 0:
|
||||
print(
|
||||
"Weights of {} not initialized from pretrained model: {}".format(
|
||||
model.__class__.__name__, missing_keys
|
||||
)
|
||||
)
|
||||
print(f"Weights of {model.__class__.__name__} not initialized from pretrained model: {missing_keys}")
|
||||
if len(unexpected_keys) > 0:
|
||||
print("Weights from pretrained model not used in {}: {}".format(model.__class__.__name__, unexpected_keys))
|
||||
print(f"Weights from pretrained model not used in {model.__class__.__name__}: {unexpected_keys}")
|
||||
if len(ignore_missing_keys) > 0:
|
||||
print(
|
||||
"Ignored weights of {} not initialized from pretrained model: {}".format(
|
||||
model.__class__.__name__, ignore_missing_keys
|
||||
)
|
||||
f"Ignored weights of {model.__class__.__name__} not initialized from pretrained model: {ignore_missing_keys}"
|
||||
)
|
||||
if len(error_msgs) > 0:
|
||||
print("\n".join(error_msgs))
|
||||
|
@ -101,7 +101,7 @@ class Data2VecVisionDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
# Copied from transformers.models.beit.modeling_beit.BeitEmbeddings with Beit->Data2VecVision
|
||||
@ -515,8 +515,8 @@ class Data2VecVisionLayer(nn.Module):
|
||||
|
||||
init_values = config.layer_scale_init_value
|
||||
if init_values > 0:
|
||||
self.lambda_1 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
|
||||
self.lambda_2 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
|
||||
self.lambda_1 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
|
||||
self.lambda_2 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
|
||||
else:
|
||||
self.lambda_1, self.lambda_2 = None, None
|
||||
|
||||
|
@ -306,7 +306,7 @@ class TFData2VecVisionSelfAttention(keras.layers.Layer):
|
||||
hidden_states: tf.Tensor,
|
||||
head_mask: tf.Tensor,
|
||||
output_attentions: bool,
|
||||
relative_position_bias: Optional["TFData2VecVisionRelativePositionBias"] = None,
|
||||
relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None,
|
||||
training: bool = False,
|
||||
) -> tuple[tf.Tensor]:
|
||||
batch_size = shape_list(hidden_states)[0]
|
||||
@ -416,7 +416,7 @@ class TFData2VecVisionAttention(keras.layers.Layer):
|
||||
input_tensor: tf.Tensor,
|
||||
head_mask: tf.Tensor,
|
||||
output_attentions: bool,
|
||||
relative_position_bias: Optional["TFData2VecVisionRelativePositionBias"] = None,
|
||||
relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None,
|
||||
training: bool = False,
|
||||
) -> tuple[tf.Tensor]:
|
||||
self_outputs = self.attention(
|
||||
@ -538,8 +538,8 @@ class TFData2VecVisionLayer(keras.layers.Layer):
|
||||
trainable=True,
|
||||
name="lambda_2",
|
||||
)
|
||||
self.lambda_1.assign(self.init_values * tf.ones((self.config.hidden_size)))
|
||||
self.lambda_2.assign(self.init_values * tf.ones((self.config.hidden_size)))
|
||||
self.lambda_1.assign(self.init_values * tf.ones(self.config.hidden_size))
|
||||
self.lambda_2.assign(self.init_values * tf.ones(self.config.hidden_size))
|
||||
else:
|
||||
self.lambda_1, self.lambda_2 = None, None
|
||||
|
||||
@ -570,7 +570,7 @@ class TFData2VecVisionLayer(keras.layers.Layer):
|
||||
hidden_states: tf.Tensor,
|
||||
head_mask: tf.Tensor,
|
||||
output_attentions: bool,
|
||||
relative_position_bias: Optional["TFData2VecVisionRelativePositionBias"] = None,
|
||||
relative_position_bias: Optional[TFData2VecVisionRelativePositionBias] = None,
|
||||
training: bool = False,
|
||||
) -> tuple[tf.Tensor]:
|
||||
self_attention_outputs = self.attention(
|
||||
|
@ -113,7 +113,7 @@ class DeepseekV3TopkRouter(nn.Module):
|
||||
self.norm_topk_prob = config.norm_topk_prob
|
||||
|
||||
self.weight = nn.Parameter(torch.empty((self.n_routed_experts, config.hidden_size)))
|
||||
self.register_buffer("e_score_correction_bias", torch.zeros((self.n_routed_experts)))
|
||||
self.register_buffer("e_score_correction_bias", torch.zeros(self.n_routed_experts))
|
||||
|
||||
@torch.no_grad()
|
||||
def get_topk_indices(self, scores):
|
||||
|
@ -110,7 +110,7 @@ class DeepseekV3TopkRouter(nn.Module):
|
||||
self.norm_topk_prob = config.norm_topk_prob
|
||||
|
||||
self.weight = nn.Parameter(torch.empty((self.n_routed_experts, config.hidden_size)))
|
||||
self.register_buffer("e_score_correction_bias", torch.zeros((self.n_routed_experts)))
|
||||
self.register_buffer("e_score_correction_bias", torch.zeros(self.n_routed_experts))
|
||||
|
||||
@torch.no_grad()
|
||||
def get_topk_indices(self, scores):
|
||||
|
@ -270,7 +270,7 @@ class EfficientFormerDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
class EfficientFormerFlat(nn.Module):
|
||||
@ -303,8 +303,8 @@ class EfficientFormerMeta3D(nn.Module):
|
||||
self.drop_path = EfficientFormerDropPath(drop_path) if drop_path > 0.0 else nn.Identity()
|
||||
self.use_layer_scale = config.use_layer_scale
|
||||
if config.use_layer_scale:
|
||||
self.layer_scale_1 = nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True)
|
||||
self.layer_scale_2 = nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True)
|
||||
self.layer_scale_1 = nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True)
|
||||
self.layer_scale_2 = nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True)
|
||||
|
||||
def forward(self, hidden_states: torch.Tensor, output_attentions: bool = False) -> tuple[torch.Tensor]:
|
||||
self_attention_outputs = self.token_mixer(self.layernorm1(hidden_states), output_attentions)
|
||||
@ -370,8 +370,8 @@ class EfficientFormerMeta4D(nn.Module):
|
||||
self.drop_path = EfficientFormerDropPath(drop_path) if drop_path > 0.0 else nn.Identity()
|
||||
self.use_layer_scale = config.use_layer_scale
|
||||
if config.use_layer_scale:
|
||||
self.layer_scale_1 = nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True)
|
||||
self.layer_scale_2 = nn.Parameter(config.layer_scale_init_value * torch.ones((dim)), requires_grad=True)
|
||||
self.layer_scale_1 = nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True)
|
||||
self.layer_scale_2 = nn.Parameter(config.layer_scale_init_value * torch.ones(dim), requires_grad=True)
|
||||
|
||||
def forward(self, hidden_states: torch.Tensor) -> tuple[torch.Tensor]:
|
||||
outputs = self.token_mixer(hidden_states)
|
||||
|
@ -14,7 +14,6 @@
|
||||
# limitations under the License.
|
||||
"""Tokenization classes for Ernie-M."""
|
||||
|
||||
import io
|
||||
import os
|
||||
import unicodedata
|
||||
from typing import Any, Optional
|
||||
@ -172,7 +171,7 @@ class ErnieMTokenizer(PreTrainedTokenizer):
|
||||
|
||||
def clean_text(self, text):
|
||||
"""Performs invalid character removal and whitespace cleanup on text."""
|
||||
return "".join((self.SP_CHAR_MAPPING.get(c, c) for c in text))
|
||||
return "".join(self.SP_CHAR_MAPPING.get(c, c) for c in text)
|
||||
|
||||
def _tokenize(self, text, enable_sampling=False, nbest_size=64, alpha=0.1):
|
||||
"""Tokenize a string."""
|
||||
@ -373,7 +372,7 @@ class ErnieMTokenizer(PreTrainedTokenizer):
|
||||
|
||||
def load_vocab(self, filepath):
|
||||
token_to_idx = {}
|
||||
with io.open(filepath, "r", encoding="utf-8") as f:
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
for index, line in enumerate(f):
|
||||
token = line.rstrip("\n")
|
||||
token_to_idx[token] = int(index)
|
||||
|
@ -118,7 +118,7 @@ class MegaSimpleRelativePositionalBias(nn.Module):
|
||||
|
||||
def forward(self, seq_len):
|
||||
if seq_len > self.max_positions:
|
||||
raise ValueError("Sequence length {} going beyond max length {}".format(seq_len, self.max_positions))
|
||||
raise ValueError(f"Sequence length {seq_len} going beyond max length {self.max_positions}")
|
||||
|
||||
# seq_len * 2 - 1
|
||||
bias = self.rel_pos_bias[(self.max_positions - seq_len) : (self.max_positions + seq_len - 1)]
|
||||
@ -298,7 +298,7 @@ class MegaSequenceNorm(nn.Module):
|
||||
elif norm_type == "syncbatchnorm":
|
||||
self.norm = nn.SyncBatchNorm(embedding_dim, eps=eps, affine=affine)
|
||||
else:
|
||||
raise ValueError("Unknown norm type: {}".format(norm_type))
|
||||
raise ValueError(f"Unknown norm type: {norm_type}")
|
||||
|
||||
def forward(self, input):
|
||||
if isinstance(self.norm, nn.modules.batchnorm._BatchNorm):
|
||||
@ -563,7 +563,7 @@ class MegaGatedCrossAttention(nn.Module):
|
||||
elif self.config.relative_positional_bias == "rotary":
|
||||
self.rel_pos_bias = MegaRotaryRelativePositionalBias(config)
|
||||
else:
|
||||
raise ValueError("unknown relative position bias: {}".format(self.config.relative_positional_bias))
|
||||
raise ValueError(f"unknown relative position bias: {self.config.relative_positional_bias}")
|
||||
|
||||
self.softmax = nn.Softmax(dim=-1)
|
||||
|
||||
|
@ -287,7 +287,7 @@ class NatDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
class NeighborhoodAttention(nn.Module):
|
||||
|
@ -99,7 +99,7 @@ TAPEX_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
|
||||
"""
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control
|
||||
|
@ -79,7 +79,7 @@ class VanDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
class VanOverlappingPatchEmbedder(nn.Module):
|
||||
@ -204,7 +204,7 @@ class VanLayerScaling(nn.Module):
|
||||
|
||||
def __init__(self, hidden_size: int, initial_value: float = 1e-2):
|
||||
super().__init__()
|
||||
self.weight = nn.Parameter(initial_value * torch.ones((hidden_size)), requires_grad=True)
|
||||
self.weight = nn.Parameter(initial_value * torch.ones(hidden_size), requires_grad=True)
|
||||
|
||||
def forward(self, hidden_state: torch.Tensor) -> torch.Tensor:
|
||||
# unsqueezing for broadcasting
|
||||
|
@ -275,7 +275,7 @@ class DinatDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
class NeighborhoodAttention(nn.Module):
|
||||
|
@ -343,7 +343,7 @@ class Dinov2DropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
class Dinov2MLP(nn.Module):
|
||||
|
@ -360,7 +360,7 @@ class Dinov2WithRegistersDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
class Dinov2WithRegistersMLP(nn.Module):
|
||||
|
@ -393,7 +393,7 @@ class DonutSwinDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
# Copied from transformers.models.swin.modeling_swin.SwinSelfAttention with Swin->DonutSwin
|
||||
@ -625,7 +625,7 @@ class DonutSwinLayer(nn.Module):
|
||||
mask_windows = window_partition(img_mask, self.window_size)
|
||||
mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
|
||||
attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
|
||||
attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
|
||||
attn_mask = attn_mask.masked_fill(attn_mask != 0, -100.0).masked_fill(attn_mask == 0, 0.0)
|
||||
else:
|
||||
attn_mask = None
|
||||
return attn_mask
|
||||
|
@ -1414,7 +1414,7 @@ class EsmFoldInvariantPointAttention(nn.Module):
|
||||
|
||||
self.linear_b = EsmFoldLinear(c_z, config.num_heads_ipa)
|
||||
|
||||
self.head_weights = nn.Parameter(torch.zeros((config.num_heads_ipa)))
|
||||
self.head_weights = nn.Parameter(torch.zeros(config.num_heads_ipa))
|
||||
|
||||
concat_out_dim = config.num_heads_ipa * (c_z + config.ipa_dim + config.num_v_points * 4)
|
||||
self.linear_out = EsmFoldLinear(concat_out_dim, c_s, init="final")
|
||||
|
@ -398,7 +398,7 @@ def map_structure_with_atom_order(in_list: list, first_call: bool = True) -> lis
|
||||
return in_list
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
@functools.cache
|
||||
def load_stereo_chemical_props() -> tuple[
|
||||
Mapping[str, list[Bond]],
|
||||
Mapping[str, list[Bond]],
|
||||
|
@ -16,7 +16,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
from functools import lru_cache
|
||||
from functools import cache
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
import numpy as np
|
||||
@ -75,7 +75,7 @@ def rot_vec_mul(r: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
|
||||
)
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
@cache
|
||||
def identity_rot_mats(
|
||||
batch_dims: tuple[int, ...],
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
@ -90,7 +90,7 @@ def identity_rot_mats(
|
||||
return rots
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
@cache
|
||||
def identity_trans(
|
||||
batch_dims: tuple[int, ...],
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
@ -101,7 +101,7 @@ def identity_trans(
|
||||
return trans
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
@cache
|
||||
def identity_quats(
|
||||
batch_dims: tuple[int, ...],
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
@ -220,7 +220,7 @@ _CACHED_QUATS: dict[str, np.ndarray] = {
|
||||
}
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
@cache
|
||||
def _get_quat(quat_key: str, dtype: torch.dtype, device: torch.device) -> torch.Tensor:
|
||||
return torch.tensor(_CACHED_QUATS[quat_key], dtype=dtype, device=device)
|
||||
|
||||
@ -1070,7 +1070,7 @@ class Rigid:
|
||||
e0 = [c / denom for c in e0]
|
||||
dot = sum((c1 * c2 for c1, c2 in zip(e0, e1)))
|
||||
e1 = [c2 - c1 * dot for c1, c2 in zip(e0, e1)]
|
||||
denom = torch.sqrt(sum((c * c for c in e1)) + eps * torch.ones_like(e1[0]))
|
||||
denom = torch.sqrt(sum(c * c for c in e1) + eps * torch.ones_like(e1[0]))
|
||||
e1 = [c / denom for c in e1]
|
||||
e2 = [
|
||||
e0[1] * e1[2] - e0[2] * e1[1],
|
||||
|
@ -949,7 +949,7 @@ class FalconH1Mixer(nn.Module):
|
||||
|
||||
# 2. Compute the state for each intra-chunk
|
||||
# (right term of low-rank factorization of off-diagonal blocks; B terms)
|
||||
decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum))
|
||||
decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum)
|
||||
B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None]
|
||||
states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2)
|
||||
|
||||
|
@ -748,7 +748,7 @@ class FalconH1Mixer(nn.Module):
|
||||
|
||||
# 2. Compute the state for each intra-chunk
|
||||
# (right term of low-rank factorization of off-diagonal blocks; B terms)
|
||||
decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum))
|
||||
decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum)
|
||||
B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None]
|
||||
states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2)
|
||||
|
||||
|
@ -318,7 +318,7 @@ class FlavaImageProcessor(BaseImageProcessor):
|
||||
image_processor_dict["codebook_crop_size"] = kwargs.pop("codebook_crop_size")
|
||||
return super().from_dict(image_processor_dict, **kwargs)
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
def masking_generator(
|
||||
self,
|
||||
input_size_patches,
|
||||
|
@ -273,7 +273,7 @@ class FlavaImageProcessorFast(BaseImageProcessorFast):
|
||||
image_processor_dict["codebook_crop_size"] = kwargs.pop("codebook_crop_size")
|
||||
return super().from_dict(image_processor_dict, **kwargs)
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
def masking_generator(
|
||||
self,
|
||||
input_size_patches,
|
||||
|
@ -1446,7 +1446,7 @@ class FlavaImageCodebook(FlavaPreTrainedModel):
|
||||
param.requires_grad = False
|
||||
|
||||
def get_codebook_indices(self, pixel_values: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
f"""
|
||||
Args:
|
||||
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
|
||||
Pixel values. Codebook pixel values can be obtained using [`AutoImageProcessor`] by passing
|
||||
@ -1458,8 +1458,8 @@ class FlavaImageCodebook(FlavaPreTrainedModel):
|
||||
>>> import requests
|
||||
>>> from transformers import AutoImageProcessor, FlavaImageCodebook
|
||||
|
||||
>>> model = FlavaImageCodebook.from_pretrained("{0}")
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("{0}")
|
||||
>>> model = FlavaImageCodebook.from_pretrained("{_CHECKPOINT_FOR_CODEBOOK_DOC}")
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("{_CHECKPOINT_FOR_CODEBOOK_DOC}")
|
||||
|
||||
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||
>>> image = Image.open(requests.get(url, stream=True).raw)
|
||||
@ -1469,7 +1469,7 @@ class FlavaImageCodebook(FlavaPreTrainedModel):
|
||||
|
||||
>>> outputs = model.get_codebook_indices(**inputs)
|
||||
```
|
||||
""".format(_CHECKPOINT_FOR_CODEBOOK_DOC)
|
||||
"""
|
||||
z_logits = self.blocks(pixel_values)
|
||||
return torch.argmax(z_logits, axis=1)
|
||||
|
||||
@ -1478,7 +1478,7 @@ class FlavaImageCodebook(FlavaPreTrainedModel):
|
||||
return nn.Softmax(dim=1)(z_logits)
|
||||
|
||||
def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
|
||||
"""
|
||||
f"""
|
||||
Args:
|
||||
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
|
||||
Pixel values. Codebook pixel values can be obtained using [`AutoImageProcessor`] by passing
|
||||
@ -1491,8 +1491,8 @@ class FlavaImageCodebook(FlavaPreTrainedModel):
|
||||
>>> import requests
|
||||
>>> from transformers import AutoImageProcessor, FlavaImageCodebook
|
||||
|
||||
>>> model = FlavaImageCodebook.from_pretrained("{0}")
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("{0}")
|
||||
>>> model = FlavaImageCodebook.from_pretrained("{_CHECKPOINT_FOR_CODEBOOK_DOC}")
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("{_CHECKPOINT_FOR_CODEBOOK_DOC}")
|
||||
|
||||
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||
>>> image = Image.open(requests.get(url, stream=True).raw)
|
||||
@ -1504,7 +1504,7 @@ class FlavaImageCodebook(FlavaPreTrainedModel):
|
||||
>>> print(outputs.shape)
|
||||
(1, 196)
|
||||
```
|
||||
""".format(_CHECKPOINT_FOR_CODEBOOK_DOC)
|
||||
"""
|
||||
if len(pixel_values.shape) != 4:
|
||||
raise ValueError(f"input shape {pixel_values.shape} is not 4d")
|
||||
if pixel_values.shape[1] != self.input_channels:
|
||||
|
@ -177,7 +177,7 @@ class FNetTokenizer(PreTrainedTokenizer):
|
||||
pieces = self.sp_model.encode(text, out_type=str)
|
||||
new_pieces = []
|
||||
for piece in pieces:
|
||||
if len(piece) > 1 and piece[-1] == str(",") and piece[-2].isdigit():
|
||||
if len(piece) > 1 and piece[-1] == "," and piece[-2].isdigit():
|
||||
cur_pieces = self.sp_model.EncodeAsPieces(piece[:-1].replace(SPIECE_UNDERLINE, ""))
|
||||
if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE:
|
||||
if len(cur_pieces[0]) == 1:
|
||||
|
@ -293,7 +293,7 @@ class FocalNetDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
class FocalNetModulation(nn.Module):
|
||||
@ -431,8 +431,8 @@ class FocalNetLayer(nn.Module):
|
||||
self.gamma_1 = 1.0
|
||||
self.gamma_2 = 1.0
|
||||
if config.use_layerscale:
|
||||
self.gamma_1 = nn.Parameter(config.layerscale_value * torch.ones((dim)), requires_grad=True)
|
||||
self.gamma_2 = nn.Parameter(config.layerscale_value * torch.ones((dim)), requires_grad=True)
|
||||
self.gamma_1 = nn.Parameter(config.layerscale_value * torch.ones(dim), requires_grad=True)
|
||||
self.gamma_2 = nn.Parameter(config.layerscale_value * torch.ones(dim), requires_grad=True)
|
||||
|
||||
def forward(self, hidden_state, input_dimensions):
|
||||
height, width = input_dimensions
|
||||
|
@ -65,7 +65,7 @@ class GLPNDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
# Copied from transformers.models.segformer.modeling_segformer.SegformerOverlapPatchEmbeddings
|
||||
|
@ -33,7 +33,7 @@ VOCAB_FILES_NAMES = {
|
||||
}
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control
|
||||
|
@ -757,7 +757,7 @@ class GraniteMoeHybridMambaLayer(nn.Module):
|
||||
|
||||
# 2. Compute the state for each intra-chunk
|
||||
# (right term of low-rank factorization of off-diagonal blocks; B terms)
|
||||
decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum))
|
||||
decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum)
|
||||
B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None]
|
||||
states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2)
|
||||
|
||||
|
@ -921,7 +921,7 @@ class GroundingDinoDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
class GroundingDinoFusionLayer(nn.Module):
|
||||
@ -937,8 +937,8 @@ class GroundingDinoFusionLayer(nn.Module):
|
||||
# add layer scale for training stability
|
||||
self.drop_path = GroundingDinoDropPath(drop_path) if drop_path > 0.0 else nn.Identity()
|
||||
init_values = 1e-4
|
||||
self.vision_param = nn.Parameter(init_values * torch.ones((config.d_model)), requires_grad=True)
|
||||
self.text_param = nn.Parameter(init_values * torch.ones((config.d_model)), requires_grad=True)
|
||||
self.vision_param = nn.Parameter(init_values * torch.ones(config.d_model), requires_grad=True)
|
||||
self.text_param = nn.Parameter(init_values * torch.ones(config.d_model), requires_grad=True)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
|
@ -459,7 +459,7 @@ class HieraDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
class HieraMlp(nn.Module):
|
||||
|
@ -203,7 +203,7 @@ def convert_hubert_checkpoint(
|
||||
config.vocab_size = len(target_dict.symbols)
|
||||
vocab_path = os.path.join(pytorch_dump_folder_path, "vocab.json")
|
||||
if not os.path.isdir(pytorch_dump_folder_path):
|
||||
logger.error("--pytorch_dump_folder_path ({}) should be a directory".format(pytorch_dump_folder_path))
|
||||
logger.error(f"--pytorch_dump_folder_path ({pytorch_dump_folder_path}) should be a directory")
|
||||
return
|
||||
os.makedirs(pytorch_dump_folder_path, exist_ok=True)
|
||||
with open(vocab_path, "w", encoding="utf-8") as vocab_handle:
|
||||
|
@ -300,12 +300,7 @@ class IdeficsDecoupledEmbedding(nn.Embedding):
|
||||
return full_vector
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "num_embeddings={}, num_additional_embeddings={}, embedding_dim={}, partially_freeze={}".format(
|
||||
self.num_embeddings,
|
||||
self.num_additional_embeddings,
|
||||
self.embedding_dim,
|
||||
self.partially_freeze,
|
||||
)
|
||||
return f"num_embeddings={self.num_embeddings}, num_additional_embeddings={self.num_additional_embeddings}, embedding_dim={self.embedding_dim}, partially_freeze={self.partially_freeze}"
|
||||
|
||||
|
||||
class IdeficsDecoupledLinear(nn.Linear):
|
||||
@ -364,13 +359,7 @@ class IdeficsDecoupledLinear(nn.Linear):
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
"""Overwriting `nn.Linear.extra_repr` to include new parameters."""
|
||||
return "in_features={}, out_features={}, out_additional_features={}, bias={}, partially_freeze={}".format(
|
||||
self.in_features,
|
||||
self.out_features,
|
||||
self.out_additional_features,
|
||||
self.bias is not None,
|
||||
self.partially_freeze,
|
||||
)
|
||||
return f"in_features={self.in_features}, out_features={self.out_features}, out_additional_features={self.out_additional_features}, bias={self.bias is not None}, partially_freeze={self.partially_freeze}"
|
||||
|
||||
|
||||
# this was adapted from LlamaRMSNorm
|
||||
|
@ -362,12 +362,7 @@ class TFIdeficsDecoupledEmbedding(tf.keras.layers.Embedding):
|
||||
return full_vector
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "num_embeddings={}, num_additional_embeddings={}, embedding_dim={}, partially_freeze={}".format(
|
||||
self.num_embeddings,
|
||||
self.num_additional_embeddings,
|
||||
self.output_dim,
|
||||
self.partially_freeze,
|
||||
)
|
||||
return f"num_embeddings={self.num_embeddings}, num_additional_embeddings={self.num_additional_embeddings}, embedding_dim={self.output_dim}, partially_freeze={self.partially_freeze}"
|
||||
|
||||
|
||||
class TFIdeficsDecoupledLinear(tf.keras.layers.Layer):
|
||||
@ -431,13 +426,7 @@ class TFIdeficsDecoupledLinear(tf.keras.layers.Layer):
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
"""Overwriting `nn.Linear.extra_repr` to include new parameters."""
|
||||
return "in_features={}, out_features={}, out_additional_features={}, bias={}, partially_freeze={}".format(
|
||||
self.in_features,
|
||||
self.out_features,
|
||||
self.out_additional_features,
|
||||
self.bias is not None,
|
||||
self.partially_freeze,
|
||||
)
|
||||
return f"in_features={self.in_features}, out_features={self.out_features}, out_additional_features={self.out_additional_features}, bias={self.bias is not None}, partially_freeze={self.partially_freeze}"
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config):
|
||||
|
@ -60,14 +60,14 @@ def load_tf_weights_in_imagegpt(model, config, imagegpt_checkpoint_path):
|
||||
)
|
||||
raise
|
||||
tf_path = os.path.abspath(imagegpt_checkpoint_path)
|
||||
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
|
||||
logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
|
||||
# Load weights from TF model
|
||||
init_vars = tf.train.list_variables(tf_path)
|
||||
names = []
|
||||
arrays = []
|
||||
|
||||
for name, shape in init_vars:
|
||||
logger.info("Loading TF weight {} with shape {}".format(name, shape))
|
||||
logger.info(f"Loading TF weight {name} with shape {shape}")
|
||||
array = tf.train.load_variable(tf_path, name)
|
||||
names.append(name)
|
||||
arrays.append(array.squeeze())
|
||||
@ -129,7 +129,7 @@ def load_tf_weights_in_imagegpt(model, config, imagegpt_checkpoint_path):
|
||||
e.args += (pointer.shape, array.shape)
|
||||
raise
|
||||
|
||||
logger.info("Initialize PyTorch weight {}".format(name))
|
||||
logger.info(f"Initialize PyTorch weight {name}")
|
||||
|
||||
if name[-1] == "q_proj":
|
||||
pointer.data[:, : config.n_embd] = torch.from_numpy(array.reshape(config.n_embd, config.n_embd)).T
|
||||
|
@ -397,8 +397,8 @@ class InternVLVisionLayer(nn.Module):
|
||||
self.layernorm_after = NORM2FN[config.norm_type](config.hidden_size, eps=config.layer_norm_eps)
|
||||
|
||||
init_values = config.layer_scale_init_value
|
||||
self.lambda_1 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
|
||||
self.lambda_2 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
|
||||
self.lambda_1 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
|
||||
self.lambda_2 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
|
||||
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
||||
|
||||
def forward(
|
||||
|
@ -348,8 +348,8 @@ class InternVLVisionLayer(nn.Module):
|
||||
self.layernorm_after = NORM2FN[config.norm_type](config.hidden_size, eps=config.layer_norm_eps)
|
||||
|
||||
init_values = config.layer_scale_init_value
|
||||
self.lambda_1 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
|
||||
self.lambda_2 = nn.Parameter(init_values * torch.ones((config.hidden_size)), requires_grad=True)
|
||||
self.lambda_1 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
|
||||
self.lambda_2 = nn.Parameter(init_values * torch.ones(config.hidden_size), requires_grad=True)
|
||||
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
||||
|
||||
def forward(
|
||||
|
@ -140,7 +140,7 @@ LAYOUTLMV3_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
|
||||
"""
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
# Copied from transformers.models.roberta.tokenization_roberta.bytes_to_unicode
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
|
@ -34,7 +34,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"}
|
||||
# See all LED models at https://huggingface.co/models?filter=LED
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
# Copied from transformers.models.bart.tokenization_bart.bytes_to_unicode
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
|
@ -797,22 +797,18 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel):
|
||||
# Remove image pairs that have been early stopped from the forward pass
|
||||
num_points_per_pair = num_points_per_pair[~early_stopped_pairs]
|
||||
descriptors, keypoints_0, keypoint_1, mask, image_indices = tuple(
|
||||
(
|
||||
tensor[~early_stops]
|
||||
for tensor in [descriptors, keypoints[0], keypoints[1], mask, image_indices]
|
||||
)
|
||||
tensor[~early_stops]
|
||||
for tensor in [descriptors, keypoints[0], keypoints[1], mask, image_indices]
|
||||
)
|
||||
keypoints = (keypoints_0, keypoint_1)
|
||||
if do_keypoint_pruning:
|
||||
pruned_keypoints_indices, pruned_keypoints_iterations, keypoint_confidences = tuple(
|
||||
(
|
||||
tensor[~early_stops]
|
||||
for tensor in [
|
||||
pruned_keypoints_indices,
|
||||
pruned_keypoints_iterations,
|
||||
keypoint_confidences,
|
||||
]
|
||||
)
|
||||
tensor[~early_stops]
|
||||
for tensor in [
|
||||
pruned_keypoints_indices,
|
||||
pruned_keypoints_iterations,
|
||||
keypoint_confidences,
|
||||
]
|
||||
)
|
||||
# If all pairs of images are early stopped, we stop the forward pass through the transformer
|
||||
# layers for all pairs of images.
|
||||
|
@ -871,22 +871,18 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel):
|
||||
# Remove image pairs that have been early stopped from the forward pass
|
||||
num_points_per_pair = num_points_per_pair[~early_stopped_pairs]
|
||||
descriptors, keypoints_0, keypoint_1, mask, image_indices = tuple(
|
||||
(
|
||||
tensor[~early_stops]
|
||||
for tensor in [descriptors, keypoints[0], keypoints[1], mask, image_indices]
|
||||
)
|
||||
tensor[~early_stops]
|
||||
for tensor in [descriptors, keypoints[0], keypoints[1], mask, image_indices]
|
||||
)
|
||||
keypoints = (keypoints_0, keypoint_1)
|
||||
if do_keypoint_pruning:
|
||||
pruned_keypoints_indices, pruned_keypoints_iterations, keypoint_confidences = tuple(
|
||||
(
|
||||
tensor[~early_stops]
|
||||
for tensor in [
|
||||
pruned_keypoints_indices,
|
||||
pruned_keypoints_iterations,
|
||||
keypoint_confidences,
|
||||
]
|
||||
)
|
||||
tensor[~early_stops]
|
||||
for tensor in [
|
||||
pruned_keypoints_indices,
|
||||
pruned_keypoints_iterations,
|
||||
keypoint_confidences,
|
||||
]
|
||||
)
|
||||
# If all pairs of images are early stopped, we stop the forward pass through the transformer
|
||||
# layers for all pairs of images.
|
||||
|
@ -161,13 +161,11 @@ def convert_llava_llama_to_hf(text_model_id, vision_model_id, output_hub_path, o
|
||||
vocab_size = config.text_config.vocab_size
|
||||
model.resize_token_embeddings(config.text_config.vocab_size + 2, pad_shape)
|
||||
model.language_model.model.embed_tokens.weight.data[vocab_size:] = torch.stack(
|
||||
tuple(
|
||||
(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0]))
|
||||
),
|
||||
tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])),
|
||||
dim=0,
|
||||
)
|
||||
model.language_model.lm_head.weight.data[vocab_size:] = torch.stack(
|
||||
tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0]))),
|
||||
tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0])),
|
||||
dim=0,
|
||||
)
|
||||
|
||||
|
@ -175,15 +175,12 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
|
||||
model.resize_token_embeddings(num_tokens, pad_to_multiple_of=pad_shape)
|
||||
model.language_model.model.embed_tokens.weight.data[vocab_size:] = torch.stack(
|
||||
tuple(
|
||||
(
|
||||
dist.sample()
|
||||
for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])
|
||||
)
|
||||
dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])
|
||||
),
|
||||
dim=0,
|
||||
)
|
||||
model.language_model.lm_head.weight.data[vocab_size:] = torch.stack(
|
||||
tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0]))),
|
||||
tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0])),
|
||||
dim=0,
|
||||
)
|
||||
|
||||
|
@ -227,13 +227,11 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
|
||||
num_tokens = vocab_size + 3
|
||||
model.resize_token_embeddings(num_tokens, pad_to_multiple_of=pad_shape)
|
||||
model.language_model.model.embed_tokens.weight.data[vocab_size:] = torch.stack(
|
||||
tuple(
|
||||
(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0]))
|
||||
),
|
||||
tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])),
|
||||
dim=0,
|
||||
)
|
||||
model.language_model.lm_head.weight.data[vocab_size:] = torch.stack(
|
||||
tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0]))),
|
||||
tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0])),
|
||||
dim=0,
|
||||
)
|
||||
|
||||
|
@ -176,13 +176,11 @@ def convert_llava_to_hf(model_id, pytorch_dump_folder_path, push_to_hub=False):
|
||||
num_tokens = vocab_size + 2
|
||||
model.resize_token_embeddings(num_tokens, pad_to_multiple_of=pad_shape)
|
||||
model.language_model.model.embed_tokens.weight.data[vocab_size:] = torch.stack(
|
||||
tuple(
|
||||
(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0]))
|
||||
),
|
||||
tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[vocab_size:].shape[0])),
|
||||
dim=0,
|
||||
)
|
||||
model.language_model.lm_head.weight.data[vocab_size:] = torch.stack(
|
||||
tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0]))),
|
||||
tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[vocab_size:].shape[0])),
|
||||
dim=0,
|
||||
)
|
||||
|
||||
|
@ -30,7 +30,7 @@ logger = logging.get_logger(__name__)
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"}
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
# Copied from transformers.models.roberta.tokenization_roberta.bytes_to_unicode
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
|
@ -127,7 +127,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
|
||||
raise ValueError
|
||||
|
||||
# Finally, save our PyTorch model and tokenizer
|
||||
print("Saving PyTorch model to {}".format(pytorch_dump_folder_path))
|
||||
print(f"Saving PyTorch model to {pytorch_dump_folder_path}")
|
||||
model.save_pretrained(pytorch_dump_folder_path)
|
||||
|
||||
|
||||
|
@ -130,7 +130,7 @@ ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
|
||||
"""
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
# Copied from transformers.models.roberta.tokenization_roberta.bytes_to_unicode
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
|
@ -606,7 +606,7 @@ class Mamba2Mixer(nn.Module):
|
||||
|
||||
# 2. Compute the state for each intra-chunk
|
||||
# (right term of low-rank factorization of off-diagonal blocks; B terms)
|
||||
decay_states = torch.exp((A_cumsum[:, :, :, -1:] - A_cumsum))
|
||||
decay_states = torch.exp(A_cumsum[:, :, :, -1:] - A_cumsum)
|
||||
B_decay = B * decay_states.permute(0, -2, -1, 1)[..., None]
|
||||
states = (B_decay[..., None, :] * hidden_states[..., None]).sum(dim=2)
|
||||
|
||||
|
@ -89,7 +89,7 @@ MARKUPLM_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
|
||||
"""
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control
|
||||
|
@ -44,7 +44,7 @@ logger = logging.get_logger(__name__)
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "tokenizer_file": "tokenizer.json"}
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control
|
||||
|
@ -1247,8 +1247,8 @@ class Mask2FormerPixelDecoder(nn.Module):
|
||||
nn.GroupNorm(32, feature_dim),
|
||||
nn.ReLU(),
|
||||
)
|
||||
self.add_module("adapter_{}".format(idx + 1), lateral_conv)
|
||||
self.add_module("layer_{}".format(idx + 1), output_conv)
|
||||
self.add_module(f"adapter_{idx + 1}", lateral_conv)
|
||||
self.add_module(f"layer_{idx + 1}", output_conv)
|
||||
|
||||
lateral_convs.append(lateral_conv)
|
||||
output_convs.append(output_conv)
|
||||
|
@ -333,7 +333,7 @@ class MaskFormerSwinDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
# Copied from transformers.models.swin.modeling_swin.SwinSelfAttention with Swin->MaskFormerSwin
|
||||
@ -556,7 +556,7 @@ class MaskFormerSwinLayer(nn.Module):
|
||||
mask_windows = window_partition(img_mask, self.window_size)
|
||||
mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
|
||||
attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
|
||||
attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
|
||||
attn_mask = attn_mask.masked_fill(attn_mask != 0, -100.0).masked_fill(attn_mask == 0, 0.0)
|
||||
else:
|
||||
attn_mask = None
|
||||
return attn_mask
|
||||
|
@ -62,7 +62,7 @@ def load_tf_weights_in_megatron_bert(model, config, tf_checkpoint_path):
|
||||
)
|
||||
raise
|
||||
tf_path = os.path.abspath(tf_checkpoint_path)
|
||||
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
|
||||
logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
|
||||
# Load weights from TF model
|
||||
init_vars = tf.train.list_variables(tf_path)
|
||||
names = []
|
||||
@ -112,7 +112,7 @@ def load_tf_weights_in_megatron_bert(model, config, tf_checkpoint_path):
|
||||
array = np.transpose(array)
|
||||
if pointer.shape != array.shape:
|
||||
raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched")
|
||||
logger.info("Initialize PyTorch weight {}".format(name))
|
||||
logger.info(f"Initialize PyTorch weight {name}")
|
||||
pointer.data = torch.from_numpy(array)
|
||||
return model
|
||||
|
||||
|
@ -65,7 +65,7 @@ class MgpstrDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@ -89,7 +89,7 @@ class MgpstrTokenizer(PreTrainedTokenizer):
|
||||
|
||||
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple[str]:
|
||||
if not os.path.isdir(save_directory):
|
||||
logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
|
||||
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
|
||||
return
|
||||
vocab_file = os.path.join(
|
||||
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
|
||||
|
@ -487,7 +487,7 @@ def to_channel_dimension_format(
|
||||
elif target_channel_dim == ChannelDimension.LAST:
|
||||
image = image.transpose((1, 2, 0))
|
||||
else:
|
||||
raise ValueError("Unsupported channel dimension format: {}".format(channel_dim))
|
||||
raise ValueError(f"Unsupported channel dimension format: {channel_dim}")
|
||||
|
||||
return image
|
||||
|
||||
|
@ -179,7 +179,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
|
||||
assert [e for e in multilingual_predicted_entities if e.startswith("en:")][0] == "en:Japan"
|
||||
|
||||
# Finally, save our PyTorch model and tokenizer
|
||||
print("Saving PyTorch model to {}".format(pytorch_dump_folder_path))
|
||||
print(f"Saving PyTorch model to {pytorch_dump_folder_path}")
|
||||
model.save_pretrained(pytorch_dump_folder_path)
|
||||
|
||||
|
||||
|
@ -60,7 +60,7 @@ def load_orig_config_file(orig_cfg_file):
|
||||
for k, v in flat_cfg.items():
|
||||
setattr(config, k, v)
|
||||
except yaml.YAMLError as exc:
|
||||
logger.error("Error while loading config file: {}. Error message: {}".format(orig_cfg_file, str(exc)))
|
||||
logger.error(f"Error while loading config file: {orig_cfg_file}. Error message: {str(exc)}")
|
||||
return config
|
||||
|
||||
|
||||
|
@ -32,7 +32,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"}
|
||||
# See all MVP models at https://huggingface.co/models?filter=mvp
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
Returns list of utf-8 byte and a mapping to unicode strings. We specifically avoids mapping to whitespace/control
|
||||
|
@ -1142,7 +1142,7 @@ def get_class_similarity(class_distance_type, cls_feature, class_proj):
|
||||
elif class_distance_type == "dot":
|
||||
class_logits = torch.bmm(cls_feature, class_proj)
|
||||
else:
|
||||
raise Exception("Unknown class_distance_type {}".format(class_distance_type))
|
||||
raise Exception(f"Unknown class_distance_type {class_distance_type}")
|
||||
return class_logits
|
||||
|
||||
|
||||
|
@ -1345,8 +1345,8 @@ class OneFormerPixelDecoder(nn.Module):
|
||||
nn.GroupNorm(32, config.conv_dim),
|
||||
nn.ReLU(),
|
||||
)
|
||||
self.add_module("adapter_{}".format(idx + 1), lateral_conv)
|
||||
self.add_module("layer_{}".format(idx + 1), output_conv)
|
||||
self.add_module(f"adapter_{idx + 1}", lateral_conv)
|
||||
self.add_module(f"layer_{idx + 1}", output_conv)
|
||||
|
||||
lateral_convs.append(lateral_conv)
|
||||
output_convs.append(output_conv)
|
||||
|
@ -346,13 +346,11 @@ def convert_paligemma2_checkpoint(
|
||||
# We add an image token so we resize the model
|
||||
model.resize_token_embeddings(config.text_config.vocab_size + 2, pad_shape)
|
||||
model.language_model.model.embed_tokens.weight.data[257152:] = torch.stack(
|
||||
tuple(
|
||||
(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[257152:].shape[0]))
|
||||
),
|
||||
tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[257152:].shape[0])),
|
||||
dim=0,
|
||||
)
|
||||
model.language_model.lm_head.weight.data[257152:] = torch.stack(
|
||||
tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[257152:].shape[0]))),
|
||||
tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[257152:].shape[0])),
|
||||
dim=0,
|
||||
)
|
||||
# convert to needed precision
|
||||
|
@ -279,11 +279,11 @@ def convert_paligemma_checkpoint(
|
||||
# We add an image token so we resize the model
|
||||
model.resize_token_embeddings(config.text_config.vocab_size + 2, pad_shape)
|
||||
model.language_model.model.embed_tokens.weight.data[257152:] = torch.stack(
|
||||
tuple((dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[257152:].shape[0]))),
|
||||
tuple(dist.sample() for _ in range(model.language_model.model.embed_tokens.weight.data[257152:].shape[0])),
|
||||
dim=0,
|
||||
)
|
||||
model.language_model.lm_head.weight.data[257152:] = torch.stack(
|
||||
tuple((dist.sample() for _ in range(model.language_model.lm_head.weight.data[257152:].shape[0]))),
|
||||
tuple(dist.sample() for _ in range(model.language_model.lm_head.weight.data[257152:].shape[0])),
|
||||
dim=0,
|
||||
)
|
||||
|
||||
|
@ -139,7 +139,7 @@ def convert_pix2struct_original_pytorch_checkpoint_to_hf(
|
||||
model.save_pretrained(pytorch_dump_folder_path)
|
||||
processor.save_pretrained(pytorch_dump_folder_path)
|
||||
|
||||
print("Model saved in {}".format(pytorch_dump_folder_path))
|
||||
print(f"Model saved in {pytorch_dump_folder_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -185,7 +185,7 @@ class PoolFormerImageProcessor(BaseImageProcessor):
|
||||
else:
|
||||
scale_size = (int(size["height"] / crop_pct), int(size["width"] / crop_pct))
|
||||
else:
|
||||
raise ValueError("Invalid size for resize: {}".format(size))
|
||||
raise ValueError(f"Invalid size for resize: {size}")
|
||||
|
||||
output_size = get_resize_output_image_size(
|
||||
image, size=scale_size, default_to_square=False, input_data_format=input_data_format
|
||||
@ -198,7 +198,7 @@ class PoolFormerImageProcessor(BaseImageProcessor):
|
||||
elif "height" in size and "width" in size:
|
||||
output_size = (size["height"], size["width"])
|
||||
else:
|
||||
raise ValueError("Invalid size for resize: {}".format(size))
|
||||
raise ValueError(f"Invalid size for resize: {size}")
|
||||
|
||||
return resize(
|
||||
image,
|
||||
|
@ -136,7 +136,7 @@ class PoolFormerImageProcessorFast(BaseImageProcessorFast):
|
||||
else:
|
||||
scale_size = (int(size.height / crop_pct), int(size.width / crop_pct))
|
||||
else:
|
||||
raise ValueError("Invalid size for resize: {}".format(size))
|
||||
raise ValueError(f"Invalid size for resize: {size}")
|
||||
|
||||
new_size = get_resize_output_image_size(
|
||||
image,
|
||||
|
@ -65,7 +65,7 @@ class PoolFormerDropPath(nn.Module):
|
||||
return drop_path(hidden_states, self.drop_prob, self.training)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return "p={}".format(self.drop_prob)
|
||||
return f"p={self.drop_prob}"
|
||||
|
||||
|
||||
class PoolFormerEmbeddings(nn.Module):
|
||||
@ -142,10 +142,10 @@ class PoolFormerLayer(nn.Module):
|
||||
self.use_layer_scale = config.use_layer_scale
|
||||
if config.use_layer_scale:
|
||||
self.layer_scale_1 = nn.Parameter(
|
||||
config.layer_scale_init_value * torch.ones((num_channels)), requires_grad=True
|
||||
config.layer_scale_init_value * torch.ones(num_channels), requires_grad=True
|
||||
)
|
||||
self.layer_scale_2 = nn.Parameter(
|
||||
config.layer_scale_init_value * torch.ones((num_channels)), requires_grad=True
|
||||
config.layer_scale_init_value * torch.ones(num_channels), requires_grad=True
|
||||
)
|
||||
|
||||
def forward(self, hidden_states):
|
||||
|
@ -404,7 +404,7 @@ class Pop2PianoTokenizer(PreTrainedTokenizer):
|
||||
notes = np.round(notes).astype(np.int32)
|
||||
max_time_idx = notes[:, :2].max()
|
||||
|
||||
times = [[] for i in range((max_time_idx + 1))]
|
||||
times = [[] for i in range(max_time_idx + 1)]
|
||||
for onset, offset, pitch, velocity in notes:
|
||||
times[onset].append([pitch, velocity])
|
||||
times[offset].append([pitch, 0])
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user