mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Fix num_hidden_layers in initialization of new model in Mamba (#30403)
Fix num_hidden_layers in initialization Originally, the initialization was using config.num_layers instead of config.num_hidden_layers. This fixes that.
This commit is contained in:
parent
1c2bb3ac54
commit
1834916481
@ -399,7 +399,7 @@ class MambaPreTrainedModel(PreTrainedModel):
|
||||
# Having just p *= scale would repeatedly scale it down
|
||||
nn.init.kaiming_uniform_(p, a=math.sqrt(5))
|
||||
with torch.no_grad():
|
||||
p /= math.sqrt(self.config.num_layers)
|
||||
p /= math.sqrt(self.config.num_hidden_layers)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
Loading…
Reference in New Issue
Block a user