GPTNeo: handle padded wte (#11079)

* GPTNeo: handle padded wte * Switch to config.vocab_size * apply review suggestion Co-authored-by: Suraj Patil <surajp815@gmail.com>
2025-07-31 02:02:21 +06:00 · 2021-04-07 06:05:20 -06:00 · 2021-04-07 06:05:20 -06:00 · 247bed3857
commit 247bed3857
parent 083ad7d46c
1 changed files with 4 additions and 0 deletions
--- a/src/transformers/models/gpt_neo/modeling_gpt_neo.py
+++ b/src/transformers/models/gpt_neo/modeling_gpt_neo.py
@ -112,6 +112,10 @@ def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path):
        if name[-1] == "w" and name[-2] in ["out_proj", "k_proj", "q_proj", "v_proj", "c_proj", "c_fc"]:
            array = array.transpose()

+        if name == ["wte"]:
+            # if vocab is padded, then trim off the padding embeddings
+            array = array[: config.vocab_size]
+
        try:
            assert (
                pointer.shape == array.shape