From 214db9e660f99ee2d7ed63c5784b829bc59caf0d Mon Sep 17 00:00:00 2001 From: chengchengpei <5881383+chengchengpei@users.noreply.github.com> Date: Mon, 23 Sep 2024 03:54:58 -0700 Subject: [PATCH] add back self.max_position_embeddings = config.max_position_embeddings (#33550) * add back self.max_position_embeddings = config.max_position_embeddings * fix-copies --- src/transformers/models/qwen2/modeling_qwen2.py | 1 + src/transformers/models/qwen2_moe/modeling_qwen2_moe.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/transformers/models/qwen2/modeling_qwen2.py b/src/transformers/models/qwen2/modeling_qwen2.py index aafecb95b6a..1e79115d347 100644 --- a/src/transformers/models/qwen2/modeling_qwen2.py +++ b/src/transformers/models/qwen2/modeling_qwen2.py @@ -310,6 +310,7 @@ class Qwen2Attention(nn.Module): self.head_dim = self.hidden_size // self.num_heads self.num_key_value_heads = config.num_key_value_heads self.num_key_value_groups = self.num_heads // self.num_key_value_heads + self.max_position_embeddings = config.max_position_embeddings self.rope_theta = config.rope_theta self.is_causal = True self.attention_dropout = config.attention_dropout diff --git a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py index bc06b406bf4..c9ee7b5f57a 100644 --- a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py +++ b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py @@ -388,6 +388,7 @@ class Qwen2MoeAttention(nn.Module): self.head_dim = self.hidden_size // self.num_heads self.num_key_value_heads = config.num_key_value_heads self.num_key_value_groups = self.num_heads // self.num_key_value_heads + self.max_position_embeddings = config.max_position_embeddings self.rope_theta = config.rope_theta self.is_causal = True self.attention_dropout = config.attention_dropout