mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
more fixes
This commit is contained in:
parent
104296a3dc
commit
019210c9c7
@ -50,10 +50,5 @@ The original code can be found [here](<INSERT LINK TO GITHUB REPO HERE>).
|
||||
- forward
|
||||
|
||||
|
||||
## AIMv2ForImageClassification
|
||||
|
||||
[[autodoc]] AIMv2ForImageClassification
|
||||
- forward
|
||||
|
||||
</pt>
|
||||
<tf>
|
||||
|
@ -6752,15 +6752,12 @@ if TYPE_CHECKING:
|
||||
model_addition_debugger_context,
|
||||
)
|
||||
from .modeling_rope_utils import ROPE_INIT_FUNCTIONS
|
||||
|
||||
from .modeling_utils import AttentionInterface, PreTrainedModel
|
||||
|
||||
from .models.aimv2 import (
|
||||
AIMv2Model,
|
||||
AIMv2TextModel,
|
||||
AIMv2VisionModel,
|
||||
)
|
||||
|
||||
from .models.albert import (
|
||||
AlbertForMaskedLM,
|
||||
AlbertForMultipleChoice,
|
||||
|
@ -79,6 +79,8 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = {
|
||||
r"text_encoder.trunk.blocks.(\d+).norm_2": r"text_model.encoder.layers.\1.rms_norm2",
|
||||
r"text_encoder.trunk.post_trunk_norm": r"text_model.rms_norm",
|
||||
r"text_projector": r"text_projection",
|
||||
r"log_logit_scale": r"logit_scale",
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -169,7 +171,7 @@ def write_model(
|
||||
|
||||
state_dict = {}
|
||||
# For `apple/aimv2-large-patch14-native` we don't have position_embedding in state_dict
|
||||
strict_loading = True
|
||||
strict_loading = False
|
||||
result = convert_old_keys_to_new_keys(original_state_dict, key_mapping)
|
||||
all_keys = list(original_state_dict.keys())
|
||||
|
||||
|
@ -83,8 +83,7 @@ class AIMv2VisionConfig(SiglipVisionConfig):
|
||||
The standard deviation of the for initializing all weight matrices.
|
||||
use_head (`str`, *optional*, defaults to `True`):
|
||||
Whether to use Attention Pooling Head or Not.
|
||||
"""
|
||||
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
hidden_size: int = 1024,
|
||||
@ -174,8 +173,7 @@ class AIMv2TextConfig(SiglipTextConfig):
|
||||
just in case (e.g., 512 or 1024 or 2048).
|
||||
initializer_range (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the for initializing all weight matrices.
|
||||
"""
|
||||
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
vocab_size: int = 49408,
|
||||
@ -268,7 +266,6 @@ class AIMv2Config(SiglipConfig):
|
||||
|
||||
>>> config = AIMv2Config.from_text_vision_configs(config_text, config_vision)
|
||||
```"""
|
||||
|
||||
def __init__(
|
||||
self, text_config=None, vision_config=None, projection_dim=512, logit_scale_init_value=2.6592, **kwargs
|
||||
):
|
||||
|
Loading…
Reference in New Issue
Block a user