fix(qwen3_moe): pass kwargs to self_attn (#38691)

This is needed to avoid `.item()` calls in `_flash_attention_forward`.
This commit is contained in:
L 2025-06-11 10:26:08 -07:00 committed by GitHub
parent 9f563ada70
commit b84ebb7f3c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 2 additions and 0 deletions

View File

@ -355,6 +355,7 @@ class Qwen3MoeDecoderLayer(nn.Module):
use_cache=use_cache, use_cache=use_cache,
cache_position=cache_position, cache_position=cache_position,
position_embeddings=position_embeddings, position_embeddings=position_embeddings,
**kwargs,
) )
hidden_states = residual + hidden_states hidden_states = residual + hidden_states

View File

@ -189,6 +189,7 @@ class Qwen3MoeDecoderLayer(Qwen2MoeDecoderLayer, nn.Module):
use_cache=use_cache, use_cache=use_cache,
cache_position=cache_position, cache_position=cache_position,
position_embeddings=position_embeddings, position_embeddings=position_embeddings,
**kwargs,
) )
hidden_states = residual + hidden_states hidden_states = residual + hidden_states