mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
fix tests - bump up version
This commit is contained in:
parent
ffd623823d
commit
009ee86a19
@ -1,4 +1,4 @@
|
||||
__version__ = "0.5.1"
|
||||
__version__ = "0.6.0"
|
||||
from .tokenization import BertTokenizer, BasicTokenizer, WordpieceTokenizer
|
||||
from .tokenization_openai import OpenAIGPTTokenizer
|
||||
from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus)
|
||||
|
@ -64,20 +64,24 @@ def load_tf_weights_in_gpt2(model, gpt2_checkpoint_path):
|
||||
print("Loading TF weight {} with shape {}".format(name, shape))
|
||||
array = tf.train.load_variable(tf_path, name)
|
||||
names.append(name)
|
||||
arrays.append(array)
|
||||
arrays.append(array.squeeze())
|
||||
|
||||
for name, array in zip(names, arrays):
|
||||
name = name[6:] # skip "model/"
|
||||
name = name.split('/')
|
||||
pointer = model
|
||||
for m_name in name:
|
||||
if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
|
||||
l = re.split(r'_(\d+)', m_name)
|
||||
if re.fullmatch(r'[A-Za-z]+\d+', m_name):
|
||||
l = re.split(r'(\d+)', m_name)
|
||||
else:
|
||||
l = [m_name]
|
||||
if l[0] == 'w' or l[0] == 'g':
|
||||
pointer = getattr(pointer, 'weight')
|
||||
elif l[0] == 'b':
|
||||
pointer = getattr(pointer, 'bias')
|
||||
elif l[0] == 'wpe' or l[0] == 'wte':
|
||||
pointer = getattr(pointer, l[0])
|
||||
pointer = getattr(pointer, 'weight')
|
||||
else:
|
||||
pointer = getattr(pointer, l[0])
|
||||
if len(l) >= 2:
|
||||
@ -107,7 +111,7 @@ class GPT2Config(object):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_size_or_config_json_file=40478,
|
||||
vocab_size_or_config_json_file=50257,
|
||||
n_positions=1024,
|
||||
n_ctx=1024,
|
||||
n_embd=768,
|
||||
@ -273,10 +277,10 @@ class Block(nn.Module):
|
||||
self.ln_2 = LayerNorm(nx, eps=config.layer_norm_epsilon)
|
||||
self.mlp = MLP(4 * nx, config)
|
||||
|
||||
def forward(self, x, past):
|
||||
def forward(self, x, past=None):
|
||||
a, present = self.attn(self.ln_1(x), past=past)
|
||||
x = x + a
|
||||
m = self.mlp(self.ln_2(c))
|
||||
m = self.mlp(self.ln_2(x))
|
||||
x = x + m
|
||||
return x, present
|
||||
|
||||
@ -522,8 +526,12 @@ class GPT2Model(GPT2PreTrainedModel):
|
||||
|
||||
self.apply(self.init_weights)
|
||||
|
||||
def forward(self, input_ids, position_ids=None, token_type_ids=None, past=None):
|
||||
past_length = 0 if past is None else past[0][0].size(-2)
|
||||
def forward(self, input_ids, position_ids=None, token_type_ids=None, pasts=None):
|
||||
if pasts is None:
|
||||
past_length = 0
|
||||
pasts = [None] * len(self.h)
|
||||
else:
|
||||
pasts[0][0].size(-2)
|
||||
if position_ids is None:
|
||||
position_ids = torch.arange(past_length, input_ids.size(-1) + past_length, dtype=torch.long, device=input_ids.device)
|
||||
position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
|
||||
@ -541,8 +549,8 @@ class GPT2Model(GPT2PreTrainedModel):
|
||||
token_type_embeds = 0
|
||||
hidden_states = inputs_embeds + position_embeds + token_type_embeds
|
||||
presents = []
|
||||
for block in self.h:
|
||||
hidden_states, present = block(hidden_states)
|
||||
for block, past in zip(self.h, pasts):
|
||||
hidden_states, present = block(hidden_states, past)
|
||||
presents.append(present)
|
||||
hidden_states = self.ln_f(hidden_states)
|
||||
output_shape = input_shape + (hidden_states.size(-1),)
|
||||
@ -599,8 +607,8 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
|
||||
"""
|
||||
self.lm_head.set_embeddings_weights(self.transformer.wte.weight)
|
||||
|
||||
def forward(self, input_ids, position_ids=None, token_type_ids=None, lm_labels=None, past=None):
|
||||
hidden_states, presents = self.transformer(input_ids, position_ids, token_type_ids, past)
|
||||
def forward(self, input_ids, position_ids=None, token_type_ids=None, lm_labels=None, pasts=None):
|
||||
hidden_states, presents = self.transformer(input_ids, position_ids, token_type_ids, pasts)
|
||||
lm_logits = self.lm_head(hidden_states)
|
||||
if lm_labels is not None:
|
||||
loss_fct = CrossEntropyLoss(ignore_index=-1)
|
||||
@ -665,8 +673,8 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
||||
"""
|
||||
self.lm_head.set_embeddings_weights(self.transformer.wte.weight)
|
||||
|
||||
def forward(self, input_ids, mc_token_ids, lm_labels=None, mc_labels=None, token_type_ids=None, position_ids=None, past=None):
|
||||
hidden_states, presents = self.transformer(input_ids, position_ids, token_type_ids, past)
|
||||
def forward(self, input_ids, mc_token_ids, lm_labels=None, mc_labels=None, token_type_ids=None, position_ids=None, pasts=None):
|
||||
hidden_states, presents = self.transformer(input_ids, position_ids, token_type_ids, pasts)
|
||||
lm_logits = self.lm_head(hidden_states)
|
||||
mc_logits = self.multiple_choice_head(hidden_states, mc_token_ids)
|
||||
losses = []
|
||||
|
@ -56,7 +56,7 @@ def load_tf_weights_in_openai_gpt(model, openai_checkpoint_folder_path):
|
||||
init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1]
|
||||
init_params = [param.reshape(shape) for param, shape in zip(init_params, shapes)]
|
||||
|
||||
# Thsi as used when we had a single embedding matrix for positions and tokens
|
||||
# This was used when we had a single embedding matrix for positions and tokens
|
||||
# init_params[0] = np.concatenate([init_params[1], init_params[0]], 0)
|
||||
# del init_params[1]
|
||||
init_params = [arr.squeeze() for arr in init_params]
|
||||
|
2
setup.py
2
setup.py
@ -38,7 +38,7 @@ from setuptools import find_packages, setup
|
||||
|
||||
setup(
|
||||
name="pytorch_pretrained_bert",
|
||||
version="0.5.1",
|
||||
version="0.6.0",
|
||||
author="Thomas Wolf, Victor Sanh, Tim Rault, Google AI Language Team Authors, Open AI team Authors",
|
||||
author_email="thomas@huggingface.co",
|
||||
description="PyTorch version of Google AI BERT model with script to load Google pre-trained models",
|
||||
|
@ -38,7 +38,6 @@ class GPT2ModelTest(unittest.TestCase):
|
||||
use_token_type_ids=True,
|
||||
use_labels=True,
|
||||
vocab_size=99,
|
||||
n_special=1,
|
||||
n_positions=33,
|
||||
n_embd=32,
|
||||
n_layer=5,
|
||||
@ -56,7 +55,6 @@ class GPT2ModelTest(unittest.TestCase):
|
||||
self.use_token_type_ids = use_token_type_ids
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.n_special = n_special
|
||||
self.n_positions = n_positions
|
||||
self.n_embd = n_embd
|
||||
self.n_layer = n_layer
|
||||
@ -76,7 +74,7 @@ class GPT2ModelTest(unittest.TestCase):
|
||||
|
||||
token_type_ids = None
|
||||
if self.use_token_type_ids:
|
||||
total_voc = self.vocab_size + self.n_special
|
||||
total_voc = self.vocab_size
|
||||
token_type_ids = GPT2ModelTest.ids_tensor([self.batch_size, self.n_choices, self.seq_length], total_voc)
|
||||
|
||||
mc_labels = None
|
||||
@ -90,7 +88,6 @@ class GPT2ModelTest(unittest.TestCase):
|
||||
config = GPT2Config(
|
||||
vocab_size_or_config_json_file=self.vocab_size,
|
||||
n_positions=self.n_positions,
|
||||
n_special=self.n_special,
|
||||
n_embd=self.n_embd,
|
||||
n_layer=self.n_layer,
|
||||
n_head=self.n_head,
|
||||
@ -130,7 +127,7 @@ class GPT2ModelTest(unittest.TestCase):
|
||||
return outputs
|
||||
|
||||
def check_gpt2_lm_head_output(self, result):
|
||||
total_voc = self.n_special + self.vocab_size
|
||||
total_voc = self.vocab_size
|
||||
self.parent.assertListEqual(
|
||||
list(result["lm_logits"].size()),
|
||||
[self.batch_size, self.n_choices, self.seq_length, total_voc])
|
||||
@ -157,7 +154,7 @@ class GPT2ModelTest(unittest.TestCase):
|
||||
return outputs
|
||||
|
||||
def check_gpt2_double_heads_output(self, result):
|
||||
total_voc = self.n_special + self.vocab_size
|
||||
total_voc = self.vocab_size
|
||||
self.parent.assertListEqual(
|
||||
list(result["lm_logits"].size()),
|
||||
[self.batch_size, self.n_choices, self.seq_length, total_voc])
|
||||
|
@ -1,56 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The Google AI Language Team Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import os
|
||||
import unittest
|
||||
import json
|
||||
|
||||
from pytorch_pretrained_bert.tokenization_gpt2 import GPT2Tokenizer
|
||||
|
||||
|
||||
class GPT2TokenizationTest(unittest.TestCase):
|
||||
|
||||
def test_full_tokenizer(self):
|
||||
""" Adapted from Sennrich et al. 2015 and https://github.com/rsennrich/subword-nmt """
|
||||
vocab = ["l", "o", "w", "e", "r", "s", "t", "i", "d", "n",
|
||||
"w</w>", "r</w>", "t</w>",
|
||||
"lo", "low", "er</w>",
|
||||
"low</w>", "lowest</w>", "newer</w>", "wider</w>"]
|
||||
vocab_tokens = dict(zip(vocab, range(len(vocab))))
|
||||
merges = ["#version: 0.2", "l o", "lo w", "e r</w>", ""]
|
||||
with open("/tmp/openai_tokenizer_vocab_test.json", "w") as fp:
|
||||
json.dump(vocab_tokens, fp)
|
||||
vocab_file = fp.name
|
||||
with open("/tmp/openai_tokenizer_merges_test.txt", "w") as fp:
|
||||
fp.write("\n".join(merges))
|
||||
merges_file = fp.name
|
||||
|
||||
tokenizer = GPT2Tokenizer(vocab_file, merges_file)
|
||||
os.remove(vocab_file)
|
||||
os.remove(merges_file)
|
||||
|
||||
text = "lower"
|
||||
bpe_tokens = ["low", "er</w>"]
|
||||
tokens = tokenizer.tokenize(text)
|
||||
self.assertListEqual(tokens, bpe_tokens)
|
||||
|
||||
input_tokens = tokens
|
||||
input_bpe_tokens = [14, 15, 20]
|
||||
self.assertListEqual(
|
||||
tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in New Issue
Block a user