From 0fc683d1cd949b6c802499b77edd32206dbe86c6 Mon Sep 17 00:00:00 2001 From: Minho Ryu Date: Tue, 8 Apr 2025 20:58:22 +0900 Subject: [PATCH] convert float for yarn related arguments in rope_scaling (#37139) * convert float for yarn related arguments in rope_scaling * sort keys alphabetically --------- Co-authored-by: ryan.agile --- .../models/deepseek_v3/configuration_deepseek_v3.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/transformers/models/deepseek_v3/configuration_deepseek_v3.py b/src/transformers/models/deepseek_v3/configuration_deepseek_v3.py index 8f04f9a8e9d..82b8701cb57 100644 --- a/src/transformers/models/deepseek_v3/configuration_deepseek_v3.py +++ b/src/transformers/models/deepseek_v3/configuration_deepseek_v3.py @@ -233,6 +233,12 @@ class DeepseekV3Config(PretrainedConfig): # BC: if there is a 'type' field, copy it it to 'rope_type'. if self.rope_scaling is not None and "type" in self.rope_scaling: self.rope_scaling["rope_type"] = self.rope_scaling["type"] + + if self.rope_scaling is not None: + for key in ["beta_fast", "beta_slow", "factor"]: + if key in self.rope_scaling: + self.rope_scaling[key] = float(self.rope_scaling[key]) + rope_config_validation(self) super().__init__(