[Qwen3] Qwen3 MoE add tp plan for expert mlps (#38135)

fix tp plan
This commit is contained in:
Huang, Guangtai 2025-05-15 00:12:39 -07:00 committed by GitHub
parent 4005e30c80
commit e5a48785d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -155,6 +155,9 @@ class Qwen3MoeConfig(PretrainedConfig):
"layers.*.self_attn.k_proj": "colwise", "layers.*.self_attn.k_proj": "colwise",
"layers.*.self_attn.v_proj": "colwise", "layers.*.self_attn.v_proj": "colwise",
"layers.*.self_attn.o_proj": "rowwise", "layers.*.self_attn.o_proj": "rowwise",
"layers.*.mlp.experts.*.gate_proj": "colwise",
"layers.*.mlp.experts.*.up_proj": "colwise",
"layers.*.mlp.experts.*.down_proj": "rowwise",
"layers.*.mlp.gate_proj": "colwise", "layers.*.mlp.gate_proj": "colwise",
"layers.*.mlp.up_proj": "colwise", "layers.*.mlp.up_proj": "colwise",
"layers.*.mlp.down_proj": "rowwise", "layers.*.mlp.down_proj": "rowwise",