mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00
Post-PR fixes! (#38868)
Some checks are pending
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Waiting to run
Build documentation / build (push) Waiting to run
New model PR merged notification / Notify new model (push) Waiting to run
Slow tests on important models (on Push - A10) / Get all modified files (push) Waiting to run
Slow tests on important models (on Push - A10) / Slow & FA2 tests (push) Blocked by required conditions
Self-hosted runner (push-caller) / Check if setup was changed (push) Waiting to run
Self-hosted runner (push-caller) / build-docker-containers (push) Blocked by required conditions
Self-hosted runner (push-caller) / Trigger Push CI (push) Blocked by required conditions
Secret Leaks / trufflehog (push) Waiting to run
Update Transformers metadata / build_and_package (push) Waiting to run
Some checks are pending
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Waiting to run
Build documentation / build (push) Waiting to run
New model PR merged notification / Notify new model (push) Waiting to run
Slow tests on important models (on Push - A10) / Get all modified files (push) Waiting to run
Slow tests on important models (on Push - A10) / Slow & FA2 tests (push) Blocked by required conditions
Self-hosted runner (push-caller) / Check if setup was changed (push) Waiting to run
Self-hosted runner (push-caller) / build-docker-containers (push) Blocked by required conditions
Self-hosted runner (push-caller) / Trigger Push CI (push) Blocked by required conditions
Secret Leaks / trufflehog (push) Waiting to run
Update Transformers metadata / build_and_package (push) Waiting to run
* Post-PR fixes! * make fix-copies
This commit is contained in:
parent
508a704055
commit
d058f81e5b
@ -15,7 +15,6 @@ import argparse
|
||||
import gc
|
||||
import os
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
import torch
|
||||
from datasets import load_dataset
|
||||
@ -90,7 +89,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = {
|
||||
}
|
||||
|
||||
|
||||
def convert_old_keys_to_new_keys(state_dict_keys: List[str]):
|
||||
def convert_old_keys_to_new_keys(state_dict_keys: list[str]):
|
||||
"""
|
||||
This function should be applied only once, on the concatenated keys to efficiently rename using
|
||||
the key mappings.
|
||||
|
@ -17,7 +17,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
@ -139,7 +139,7 @@ class LightGlueImageProcessor(BaseImageProcessor):
|
||||
do_resize (`bool`, *optional*, defaults to `True`):
|
||||
Controls whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden
|
||||
by `do_resize` in the `preprocess` method.
|
||||
size (`Dict[str, int]` *optional*, defaults to `{"height": 480, "width": 640}`):
|
||||
size (`dict[str, int]` *optional*, defaults to `{"height": 480, "width": 640}`):
|
||||
Resolution of the output image after `resize` is applied. Only has an effect if `do_resize` is set to
|
||||
`True`. Can be overridden by `size` in the `preprocess` method.
|
||||
resample (`PILImageResampling`, *optional*, defaults to `Resampling.BILINEAR`):
|
||||
@ -159,7 +159,7 @@ class LightGlueImageProcessor(BaseImageProcessor):
|
||||
def __init__(
|
||||
self,
|
||||
do_resize: bool = True,
|
||||
size: Optional[Dict[str, int]] = None,
|
||||
size: Optional[dict[str, int]] = None,
|
||||
resample: PILImageResampling = PILImageResampling.BILINEAR,
|
||||
do_rescale: bool = True,
|
||||
rescale_factor: float = 1 / 255,
|
||||
@ -180,7 +180,7 @@ class LightGlueImageProcessor(BaseImageProcessor):
|
||||
def resize(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
size: Dict[str, int],
|
||||
size: dict[str, int],
|
||||
data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
@ -191,7 +191,7 @@ class LightGlueImageProcessor(BaseImageProcessor):
|
||||
Args:
|
||||
image (`np.ndarray`):
|
||||
Image to resize.
|
||||
size (`Dict[str, int]`):
|
||||
size (`dict[str, int]`):
|
||||
Dictionary of the form `{"height": int, "width": int}`, specifying the size of the output image.
|
||||
data_format (`ChannelDimension` or `str`, *optional*):
|
||||
The channel dimension format of the output image. If not provided, it will be inferred from the input
|
||||
@ -220,7 +220,7 @@ class LightGlueImageProcessor(BaseImageProcessor):
|
||||
self,
|
||||
images,
|
||||
do_resize: Optional[bool] = None,
|
||||
size: Optional[Dict[str, int]] = None,
|
||||
size: Optional[dict[str, int]] = None,
|
||||
resample: PILImageResampling = None,
|
||||
do_rescale: Optional[bool] = None,
|
||||
rescale_factor: Optional[float] = None,
|
||||
@ -240,7 +240,7 @@ class LightGlueImageProcessor(BaseImageProcessor):
|
||||
`do_rescale=False`.
|
||||
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
|
||||
Whether to resize the image.
|
||||
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
|
||||
size (`dict[str, int]`, *optional*, defaults to `self.size`):
|
||||
Size of the output image after `resize` has been applied. If `size["shortest_edge"]` >= 384, the image
|
||||
is resized to `(size["shortest_edge"], size["shortest_edge"])`. Otherwise, the smaller edge of the
|
||||
image will be matched to `int(size["shortest_edge"]/ crop_pct)`, after which the image is cropped to
|
||||
@ -337,23 +337,23 @@ class LightGlueImageProcessor(BaseImageProcessor):
|
||||
def post_process_keypoint_matching(
|
||||
self,
|
||||
outputs: LightGlueKeypointMatchingOutput,
|
||||
target_sizes: Union[TensorType, List[Tuple]],
|
||||
target_sizes: Union[TensorType, list[tuple]],
|
||||
threshold: float = 0.0,
|
||||
) -> List[Dict[str, torch.Tensor]]:
|
||||
) -> list[dict[str, torch.Tensor]]:
|
||||
"""
|
||||
Converts the raw output of [`KeypointMatchingOutput`] into lists of keypoints, scores and descriptors
|
||||
with coordinates absolute to the original image sizes.
|
||||
Args:
|
||||
outputs ([`KeypointMatchingOutput`]):
|
||||
Raw outputs of the model.
|
||||
target_sizes (`torch.Tensor` or `List[Tuple[Tuple[int, int]]]`, *optional*):
|
||||
Tensor of shape `(batch_size, 2, 2)` or list of tuples of tuples (`Tuple[int, int]`) containing the
|
||||
target_sizes (`torch.Tensor` or `list[tuple[tuple[int, int]]]`, *optional*):
|
||||
Tensor of shape `(batch_size, 2, 2)` or list of tuples of tuples (`tuple[int, int]`) containing the
|
||||
target size `(height, width)` of each image in the batch. This must be the original image size (before
|
||||
any processing).
|
||||
threshold (`float`, *optional*, defaults to 0.0):
|
||||
Threshold to filter out the matches with low scores.
|
||||
Returns:
|
||||
`List[Dict]`: A list of dictionaries, each dictionary containing the keypoints in the first and second image
|
||||
`list[Dict]`: A list of dictionaries, each dictionary containing the keypoints in the first and second image
|
||||
of the pair, the matching scores and the matching indices.
|
||||
"""
|
||||
if outputs.mask.shape[0] != len(target_sizes):
|
||||
@ -361,7 +361,7 @@ class LightGlueImageProcessor(BaseImageProcessor):
|
||||
if not all(len(target_size) == 2 for target_size in target_sizes):
|
||||
raise ValueError("Each element of target_sizes must contain the size (h, w) of each image of the batch")
|
||||
|
||||
if isinstance(target_sizes, List):
|
||||
if isinstance(target_sizes, list):
|
||||
image_pair_sizes = torch.tensor(target_sizes, device=outputs.mask.device)
|
||||
else:
|
||||
if target_sizes.shape[1] != 2 or target_sizes.shape[2] != 2:
|
||||
|
@ -18,7 +18,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable, Optional, Tuple, Union
|
||||
from typing import Callable, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
@ -74,8 +74,8 @@ class LightGlueKeypointMatchingOutput(ModelOutput):
|
||||
keypoints: Optional[torch.FloatTensor] = None
|
||||
prune: Optional[torch.IntTensor] = None
|
||||
mask: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||
hidden_states: Optional[tuple[torch.FloatTensor]] = None
|
||||
attentions: Optional[tuple[torch.FloatTensor]] = None
|
||||
|
||||
|
||||
class LightGluePositionalEncoder(nn.Module):
|
||||
@ -85,7 +85,7 @@ class LightGluePositionalEncoder(nn.Module):
|
||||
|
||||
def forward(
|
||||
self, keypoints: torch.Tensor, output_hidden_states: Optional[bool] = False
|
||||
) -> Union[Tuple[torch.Tensor], Tuple[torch.Tensor, torch.Tensor]]:
|
||||
) -> Union[tuple[torch.Tensor], tuple[torch.Tensor, torch.Tensor]]:
|
||||
projected_keypoints = self.projector(keypoints)
|
||||
embeddings = projected_keypoints.repeat_interleave(2, dim=-1)
|
||||
cosines = torch.cos(embeddings)
|
||||
@ -200,12 +200,12 @@ class LightGlueAttention(nn.Module):
|
||||
def forward(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
|
||||
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
encoder_hidden_states: Optional[torch.Tensor] = None,
|
||||
encoder_attention_mask: Optional[torch.Tensor] = None,
|
||||
**kwargs: Unpack[FlashAttentionKwargs],
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
||||
input_shape = hidden_states.shape[:-1]
|
||||
hidden_shape = (*input_shape, -1, self.head_dim)
|
||||
|
||||
@ -274,7 +274,7 @@ class LightGlueTransformerLayer(nn.Module):
|
||||
attention_mask: torch.Tensor,
|
||||
output_hidden_states: Optional[bool] = False,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor]], Optional[Tuple[torch.Tensor]]]:
|
||||
) -> tuple[torch.Tensor, Optional[tuple[torch.Tensor]], Optional[tuple[torch.Tensor]]]:
|
||||
all_hidden_states = () if output_hidden_states else None
|
||||
all_attentions = () if output_attentions else None
|
||||
|
||||
@ -435,7 +435,7 @@ class LightGluePreTrainedModel(PreTrainedModel):
|
||||
module.weight.data.fill_(1.0)
|
||||
|
||||
|
||||
def get_matches_from_scores(scores: torch.Tensor, threshold: float) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
def get_matches_from_scores(scores: torch.Tensor, threshold: float) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""obtain matches from a score matrix [Bx M+1 x N+1]"""
|
||||
batch_size, _, _ = scores.shape
|
||||
# For each keypoint, get the best match
|
||||
@ -548,7 +548,7 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel):
|
||||
|
||||
def _keypoint_processing(
|
||||
self, descriptors: torch.Tensor, keypoints: torch.Tensor, output_hidden_states: Optional[bool] = False
|
||||
) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
|
||||
) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
|
||||
descriptors = descriptors.detach().contiguous()
|
||||
projected_descriptors = self.input_projection(descriptors)
|
||||
keypoint_encoding_output = self.positional_encoder(keypoints, output_hidden_states=output_hidden_states)
|
||||
@ -659,7 +659,7 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel):
|
||||
matches: torch.Tensor,
|
||||
matching_scores: torch.Tensor,
|
||||
num_keypoints: torch.Tensor,
|
||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
# (batch_size, num_keypoints) -> (batch_size // 2, 2, num_keypoints) -> 2 * (batch_size // 2, num_keypoints) to
|
||||
# have tensors from
|
||||
batch_size, _ = indices.shape
|
||||
@ -699,7 +699,7 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel):
|
||||
mask: torch.Tensor = None,
|
||||
output_attentions: Optional[bool] = None,
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, Tuple, Tuple]:
|
||||
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, tuple, tuple]:
|
||||
all_hidden_states = () if output_hidden_states else None
|
||||
all_attentions = () if output_attentions else None
|
||||
|
||||
@ -875,7 +875,7 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel):
|
||||
labels: Optional[torch.LongTensor] = None,
|
||||
output_attentions: Optional[bool] = None,
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
) -> Union[Tuple, LightGlueKeypointMatchingOutput]:
|
||||
) -> Union[tuple, LightGlueKeypointMatchingOutput]:
|
||||
loss = None
|
||||
if labels is not None:
|
||||
raise ValueError("LightGlue is not trainable, no labels should be provided.")
|
||||
|
@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable, Dict, List, Optional, Tuple, Union
|
||||
from typing import Callable, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
@ -196,17 +196,17 @@ class LightGlueKeypointMatchingOutput(ModelOutput):
|
||||
keypoints: Optional[torch.FloatTensor] = None
|
||||
prune: Optional[torch.IntTensor] = None
|
||||
mask: Optional[torch.FloatTensor] = None
|
||||
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
||||
attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||
hidden_states: Optional[tuple[torch.FloatTensor]] = None
|
||||
attentions: Optional[tuple[torch.FloatTensor]] = None
|
||||
|
||||
|
||||
class LightGlueImageProcessor(SuperGlueImageProcessor):
|
||||
def post_process_keypoint_matching(
|
||||
self,
|
||||
outputs: LightGlueKeypointMatchingOutput,
|
||||
target_sizes: Union[TensorType, List[Tuple]],
|
||||
target_sizes: Union[TensorType, list[tuple]],
|
||||
threshold: float = 0.0,
|
||||
) -> List[Dict[str, torch.Tensor]]:
|
||||
) -> list[dict[str, torch.Tensor]]:
|
||||
return super().post_process_keypoint_matching(outputs, target_sizes, threshold)
|
||||
|
||||
def plot_keypoint_matching(self, images: ImageInput, keypoint_matching_output: LightGlueKeypointMatchingOutput):
|
||||
@ -263,7 +263,7 @@ class LightGluePositionalEncoder(nn.Module):
|
||||
|
||||
def forward(
|
||||
self, keypoints: torch.Tensor, output_hidden_states: Optional[bool] = False
|
||||
) -> Union[Tuple[torch.Tensor], Tuple[torch.Tensor, torch.Tensor]]:
|
||||
) -> Union[tuple[torch.Tensor], tuple[torch.Tensor, torch.Tensor]]:
|
||||
projected_keypoints = self.projector(keypoints)
|
||||
embeddings = projected_keypoints.repeat_interleave(2, dim=-1)
|
||||
cosines = torch.cos(embeddings)
|
||||
@ -277,12 +277,12 @@ class LightGlueAttention(LlamaAttention):
|
||||
def forward(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
|
||||
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None,
|
||||
encoder_hidden_states: Optional[torch.Tensor] = None,
|
||||
encoder_attention_mask: Optional[torch.Tensor] = None,
|
||||
**kwargs: Unpack[FlashAttentionKwargs],
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
||||
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
||||
input_shape = hidden_states.shape[:-1]
|
||||
hidden_shape = (*input_shape, -1, self.head_dim)
|
||||
|
||||
@ -348,7 +348,7 @@ class LightGlueTransformerLayer(nn.Module):
|
||||
attention_mask: torch.Tensor,
|
||||
output_hidden_states: Optional[bool] = False,
|
||||
output_attentions: Optional[bool] = False,
|
||||
) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor]], Optional[Tuple[torch.Tensor]]]:
|
||||
) -> tuple[torch.Tensor, Optional[tuple[torch.Tensor]], Optional[tuple[torch.Tensor]]]:
|
||||
all_hidden_states = () if output_hidden_states else None
|
||||
all_attentions = () if output_attentions else None
|
||||
|
||||
@ -509,7 +509,7 @@ class LightGluePreTrainedModel(PreTrainedModel):
|
||||
module.weight.data.fill_(1.0)
|
||||
|
||||
|
||||
def get_matches_from_scores(scores: torch.Tensor, threshold: float) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
def get_matches_from_scores(scores: torch.Tensor, threshold: float) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""obtain matches from a score matrix [Bx M+1 x N+1]"""
|
||||
batch_size, _, _ = scores.shape
|
||||
# For each keypoint, get the best match
|
||||
@ -622,7 +622,7 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel):
|
||||
|
||||
def _keypoint_processing(
|
||||
self, descriptors: torch.Tensor, keypoints: torch.Tensor, output_hidden_states: Optional[bool] = False
|
||||
) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
|
||||
) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
|
||||
descriptors = descriptors.detach().contiguous()
|
||||
projected_descriptors = self.input_projection(descriptors)
|
||||
keypoint_encoding_output = self.positional_encoder(keypoints, output_hidden_states=output_hidden_states)
|
||||
@ -733,7 +733,7 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel):
|
||||
matches: torch.Tensor,
|
||||
matching_scores: torch.Tensor,
|
||||
num_keypoints: torch.Tensor,
|
||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
# (batch_size, num_keypoints) -> (batch_size // 2, 2, num_keypoints) -> 2 * (batch_size // 2, num_keypoints) to
|
||||
# have tensors from
|
||||
batch_size, _ = indices.shape
|
||||
@ -773,7 +773,7 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel):
|
||||
mask: torch.Tensor = None,
|
||||
output_attentions: Optional[bool] = None,
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, Tuple, Tuple]:
|
||||
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, tuple, tuple]:
|
||||
all_hidden_states = () if output_hidden_states else None
|
||||
all_attentions = () if output_attentions else None
|
||||
|
||||
@ -949,7 +949,7 @@ class LightGlueForKeypointMatching(LightGluePreTrainedModel):
|
||||
labels: Optional[torch.LongTensor] = None,
|
||||
output_attentions: Optional[bool] = None,
|
||||
output_hidden_states: Optional[bool] = None,
|
||||
) -> Union[Tuple, LightGlueKeypointMatchingOutput]:
|
||||
) -> Union[tuple, LightGlueKeypointMatchingOutput]:
|
||||
loss = None
|
||||
if labels is not None:
|
||||
raise ValueError("LightGlue is not trainable, no labels should be provided.")
|
||||
|
Loading…
Reference in New Issue
Block a user