Enable device map (#30870)

* added_no_split_modules * added LlavaNextVisionAttention to _no_split_modules
2025-07-31 02:02:21 +06:00 · 2024-05-17 17:20:24 +05:30 · 2024-05-17 17:20:24 +05:30 · 3802e786ef
commit 3802e786ef
parent 57c965a8f1
1 changed files with 1 additions and 0 deletions
--- a/src/transformers/models/video_llava/modeling_video_llava.py
+++ b/src/transformers/models/video_llava/modeling_video_llava.py
@ -124,6 +124,7 @@ class VideoLlavaPreTrainedModel(PreTrainedModel):
    supports_gradient_checkpointing = True
    _skip_keys_device_placement = "past_key_values"
    _supports_flash_attn_2 = True
+    _no_split_modules = ["VideoLlavaVisionAttention"]

    def _init_weights(self, module):
        # important: this ported version of VideoLlava isn't meant for training from scratch - only