clean up pr

2025-07-31 02:02:21 +06:00 · 2018-11-13 16:31:20 +01:00 · 2018-11-13 16:31:20 +01:00 · 7ba83730c4
commit 7ba83730c4
parent fa0c5a2ea1
2 changed files with 15 additions and 5 deletions
--- a/convert_tf_checkpoint_to_pytorch.py
+++ b/convert_tf_checkpoint_to_pytorch.py
@ -68,11 +68,17 @@ def convert():
        arrays.append(array)

    for name, array in zip(names, arrays):
-        name = name[5:]  # skip "bert/"
+        if not name.startswith("bert"):
+            print("Skipping {}".format(name))
+            continue
+        else:
+            name = name.replace("bert/", "")  # skip "bert/"
        print("Loading {}".format(name))
        name = name.split('/')
-        if name[0] in ['redictions', 'eq_relationship']:
-            print("Skipping")
+        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
+        # which are not required for using pretrained model
+        if name[0] in ['redictions', 'eq_relationship'] or name[-1] == "adam_v" or  name[-1] == "adam_m":
+            print("Skipping {}".format("/".join(name)))
            continue
        pointer = model
        for m_name in name:
--- a/modeling.py
+++ b/modeling.py
@ -26,6 +26,10 @@ import torch
 import torch.nn as nn
 from torch.nn import CrossEntropyLoss

+
+ACT2FN = {"gelu": gelu, "relu": torch.nn.ReLU, "swish": swish}
+
+
 def gelu(x):
    """Implementation of the gelu activation function.
        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
@ -241,8 +245,8 @@ class BERTIntermediate(nn.Module):
    def __init__(self, config):
        super(BERTIntermediate, self).__init__()
        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
-        act2fn = {"gelu": gelu, "relu": torch.nn.ReLU, "swish": swish}
-        self.intermediate_act_fn = act2fn[config.hidden_act] if isinstance(config.hidden_act, str) else config.hidden_act
+        self.intermediate_act_fn = ACT2FN[config.hidden_act] \
+            if isinstance(config.hidden_act, str) else config.hidden_act

    def forward(self, hidden_states):
        hidden_states = self.dense(hidden_states)