diff --git a/src/transformers/models/dinov2/modeling_dinov2.py b/src/transformers/models/dinov2/modeling_dinov2.py index 30bf56d3524..69911e40bc3 100644 --- a/src/transformers/models/dinov2/modeling_dinov2.py +++ b/src/transformers/models/dinov2/modeling_dinov2.py @@ -491,7 +491,7 @@ class Dinov2PreTrainedModel(PreTrainedModel): base_model_prefix = "dinov2" main_input_name = "pixel_values" supports_gradient_checkpointing = True - _no_split_modules = ["Dinov2SwiGLUFFN"] + _no_split_modules = ["Dinov2Layer"] _supports_sdpa = True _supports_flash_attn_2 = True _supports_flex_attn = True diff --git a/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py b/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py index 82d8169c38d..db2d2b01d13 100644 --- a/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +++ b/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py @@ -509,7 +509,7 @@ class Dinov2WithRegistersPreTrainedModel(PreTrainedModel): base_model_prefix = "dinov2_with_registers" main_input_name = "pixel_values" supports_gradient_checkpointing = True - _no_split_modules = ["Dinov2WithRegistersSwiGLUFFN"] + _no_split_modules = ["Dinov2WithRegistersLayer"] _supports_sdpa = True _supports_flash_attn_2 = True _supports_flex_attn = True