mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-03 03:31:05 +06:00
Update modeling_gpt_neox.py (#17575)
I'm guessing that the intention was to have the `_no_split_modules` class attribute for `GPTNeoXPreTrainedModel` to be set to `["GPTNeoXLayer"]`, akin to how its set as `["GPTJBlock"]` for `GPTJPreTrainedModel`. If this is incorrect, please feel free to just close the PR. Thanks!
This commit is contained in:
parent
a1344dbfb9
commit
5483388631
@ -53,6 +53,7 @@ class GPTNeoXPreTrainedModel(PreTrainedModel):
|
||||
config_class = GPTNeoXConfig
|
||||
base_model_prefix = "gpt_neox"
|
||||
supports_gradient_checkpointing = True
|
||||
_no_split_modules = ["GPTNeoXLayer"]
|
||||
|
||||
def _init_weights(self, module):
|
||||
"""Initialize the weights"""
|
||||
|
Loading…
Reference in New Issue
Block a user