diff --git a/src/transformers/models/siglip/modeling_siglip.py b/src/transformers/models/siglip/modeling_siglip.py index 252315d4186..d6966c31f66 100644 --- a/src/transformers/models/siglip/modeling_siglip.py +++ b/src/transformers/models/siglip/modeling_siglip.py @@ -370,7 +370,7 @@ def eager_attention_forward( class SiglipAttention(nn.Module): """Multi-headed attention from 'Attention Is All You Need' paper""" - def __init__(self, config: Union[SiglipVisionConfig, SiglipTextConfig]): + def __init__(self, config): super().__init__() self.config = config self.embed_dim = config.hidden_size diff --git a/src/transformers/models/siglip2/modeling_siglip2.py b/src/transformers/models/siglip2/modeling_siglip2.py index 8614384241b..a198cbc347f 100644 --- a/src/transformers/models/siglip2/modeling_siglip2.py +++ b/src/transformers/models/siglip2/modeling_siglip2.py @@ -264,7 +264,7 @@ def eager_attention_forward( class Siglip2Attention(nn.Module): """Multi-headed attention from 'Attention Is All You Need' paper""" - def __init__(self, config: Union[Siglip2VisionConfig, Siglip2TextConfig]): + def __init__(self, config): super().__init__() self.config = config self.embed_dim = config.hidden_size diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 9ee6a93dbbc..9f56c79956e 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -739,7 +739,6 @@ class ModelTesterMixin: model = model_class(config) model.to(torch_device) model.eval() - print(model_class) with torch.no_grad(): first = model(**self._prepare_for_class(inputs_dict, model_class))[0] second = model(**self._prepare_for_class(inputs_dict, model_class))[0]