From e5a48785d9757278eaf201b492a8eaba9724a84a Mon Sep 17 00:00:00 2001 From: "Huang, Guangtai" Date: Thu, 15 May 2025 00:12:39 -0700 Subject: [PATCH] [Qwen3] Qwen3 MoE add tp plan for expert mlps (#38135) fix tp plan --- src/transformers/models/qwen3_moe/configuration_qwen3_moe.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/transformers/models/qwen3_moe/configuration_qwen3_moe.py b/src/transformers/models/qwen3_moe/configuration_qwen3_moe.py index 8b3219c9c39..082b8ffb8cb 100644 --- a/src/transformers/models/qwen3_moe/configuration_qwen3_moe.py +++ b/src/transformers/models/qwen3_moe/configuration_qwen3_moe.py @@ -155,6 +155,9 @@ class Qwen3MoeConfig(PretrainedConfig): "layers.*.self_attn.k_proj": "colwise", "layers.*.self_attn.v_proj": "colwise", "layers.*.self_attn.o_proj": "rowwise", + "layers.*.mlp.experts.*.gate_proj": "colwise", + "layers.*.mlp.experts.*.up_proj": "colwise", + "layers.*.mlp.experts.*.down_proj": "rowwise", "layers.*.mlp.gate_proj": "colwise", "layers.*.mlp.up_proj": "colwise", "layers.*.mlp.down_proj": "rowwise",