From b591d925bed9235d158a9ff569c5ac1f4c619157 Mon Sep 17 00:00:00 2001 From: Cyril Vallez Date: Tue, 20 May 2025 16:00:46 +0200 Subject: [PATCH] Fix Llama4 (#38222) Update modeling_llama4.py --- src/transformers/models/llama4/modeling_llama4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/llama4/modeling_llama4.py b/src/transformers/models/llama4/modeling_llama4.py index 5bf5f1488c5..f8274bf1822 100644 --- a/src/transformers/models/llama4/modeling_llama4.py +++ b/src/transformers/models/llama4/modeling_llama4.py @@ -144,7 +144,7 @@ class Llama4TextMoe(nn.Module): def forward(self, hidden_states): batch, seq_len, hidden_dim = hidden_states.shape - hidden_states = hidden_states.view(-1, self.hidden_dim) + hidden_states = hidden_states.reshape(-1, self.hidden_dim) router_logits = self.router(hidden_states) tokens_per_expert = batch * seq_len