Merge pull request #2291 from aaugustin/fix-flake8-F841

Fix F841 flake8 warning
This commit is contained in:
Thomas Wolf 2019-12-25 22:37:42 +01:00 committed by GitHub
commit 0412f3d929
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 14 additions and 41 deletions

View File

@ -44,13 +44,10 @@ from transformers import (
AdamW, AdamW,
OpenAIGPTDoubleHeadsModel, OpenAIGPTDoubleHeadsModel,
OpenAIGPTTokenizer, OpenAIGPTTokenizer,
cached_path,
get_linear_schedule_with_warmup, get_linear_schedule_with_warmup,
) )
ROCSTORIES_URL = "https://s3.amazonaws.com/datasets.huggingface.co/ROCStories.tar.gz"
logging.basicConfig( logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO
) )
@ -182,9 +179,6 @@ def main():
model.to(device) model.to(device)
# Load and encode the datasets # Load and encode the datasets
if not args.train_dataset and not args.eval_dataset:
roc_stories = cached_path(ROCSTORIES_URL)
def tokenize_and_encode(obj): def tokenize_and_encode(obj):
""" Tokenize and encode a nested object """ """ Tokenize and encode a nested object """
if isinstance(obj, str): if isinstance(obj, str):

View File

@ -28,7 +28,7 @@ import time
import torch import torch
from transformers import TransfoXLCorpus, TransfoXLLMHeadModel, TransfoXLTokenizer from transformers import TransfoXLCorpus, TransfoXLLMHeadModel
logging.basicConfig( logging.basicConfig(
@ -73,9 +73,7 @@ def main():
# The pre-processing involve computing word frequencies to prepare the Adaptive input and SoftMax # The pre-processing involve computing word frequencies to prepare the Adaptive input and SoftMax
# and tokenizing the dataset # and tokenizing the dataset
# The pre-processed corpus is a convertion (using the conversion script ) # The pre-processed corpus is a convertion (using the conversion script )
tokenizer = TransfoXLTokenizer.from_pretrained(args.model_name)
corpus = TransfoXLCorpus.from_pretrained(args.model_name) corpus = TransfoXLCorpus.from_pretrained(args.model_name)
ntokens = len(corpus.vocab)
va_iter = corpus.get_iterator("valid", args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len) va_iter = corpus.get_iterator("valid", args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len)
te_iter = corpus.get_iterator("test", args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len) te_iter = corpus.get_iterator("test", args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len)

View File

@ -141,7 +141,7 @@ def train(args, train_dataset, model, tokenizer):
global_step = 0 global_step = 0
tr_loss, logging_loss = 0.0, 0.0 tr_loss, logging_loss = 0.0, 0.0
best_dev_acc, best_dev_loss = 0.0, 99999999999.0 best_dev_acc = 0.0
best_steps = 0 best_steps = 0
model.zero_grad() model.zero_grad()
train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]) train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
@ -193,7 +193,6 @@ def train(args, train_dataset, model, tokenizer):
tb_writer.add_scalar("eval_{}".format(key), value, global_step) tb_writer.add_scalar("eval_{}".format(key), value, global_step)
if results["eval_acc"] > best_dev_acc: if results["eval_acc"] > best_dev_acc:
best_dev_acc = results["eval_acc"] best_dev_acc = results["eval_acc"]
best_dev_loss = results["eval_loss"]
best_steps = global_step best_steps = global_step
if args.do_test: if args.do_test:
results_test = evaluate(args, model, tokenizer, test=True) results_test = evaluate(args, model, tokenizer, test=True)

View File

@ -446,8 +446,6 @@ class MultiHeadedAttention(nn.Module):
batch_size = key.size(0) batch_size = key.size(0)
dim_per_head = self.dim_per_head dim_per_head = self.dim_per_head
head_count = self.head_count head_count = self.head_count
key_len = key.size(1)
query_len = query.size(1)
def shape(x): def shape(x):
""" projection """ """ projection """
@ -504,9 +502,6 @@ class MultiHeadedAttention(nn.Module):
query = shape(query) query = shape(query)
key_len = key.size(2)
query_len = query.size(2)
# 2) Calculate and scale scores. # 2) Calculate and scale scores.
query = query / math.sqrt(dim_per_head) query = query / math.sqrt(dim_per_head)
scores = torch.matmul(query, key.transpose(2, 3)) scores = torch.matmul(query, key.transpose(2, 3))

View File

@ -25,5 +25,5 @@ multi_line_output = 3
use_parentheses = True use_parentheses = True
[flake8] [flake8]
ignore = E203, E501, F841, W503 ignore = E203, E501, W503
max-line-length = 119 max-line-length = 119

View File

@ -19,7 +19,7 @@ try:
from sklearn.metrics import matthews_corrcoef, f1_score from sklearn.metrics import matthews_corrcoef, f1_score
_has_sklearn = True _has_sklearn = True
except (AttributeError, ImportError) as e: except (AttributeError, ImportError):
_has_sklearn = False _has_sklearn = False

View File

@ -241,8 +241,6 @@ class AlbertAttention(BertSelfAttention):
context_layer = torch.matmul(attention_probs, value_layer) context_layer = torch.matmul(attention_probs, value_layer)
context_layer = context_layer.permute(0, 2, 1, 3).contiguous() context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
reshaped_context_layer = context_layer.view(*new_context_layer_shape)
# Should find a better way to do this # Should find a better way to do this
w = ( w = (
@ -334,9 +332,6 @@ class AlbertTransformer(nn.Module):
# Index of the hidden group # Index of the hidden group
group_idx = int(i / (self.config.num_hidden_layers / self.config.num_hidden_groups)) group_idx = int(i / (self.config.num_hidden_layers / self.config.num_hidden_groups))
# Index of the layer inside the group
layer_idx = int(i - group_idx * layers_per_group)
layer_group_output = self.albert_layer_groups[group_idx]( layer_group_output = self.albert_layer_groups[group_idx](
hidden_states, hidden_states,
attention_mask, attention_mask,

View File

@ -629,7 +629,7 @@ class T5Stack(T5PreTrainedModel):
all_attentions = all_attentions + (layer_outputs[1],) # We keep only self-attention weights for now all_attentions = all_attentions + (layer_outputs[1],) # We keep only self-attention weights for now
hidden_states = self.final_layer_norm(hidden_states) hidden_states = self.final_layer_norm(hidden_states)
layer_output = self.dropout(hidden_states) hidden_states = self.dropout(hidden_states)
# Add last layer # Add last layer
if self.output_hidden_states: if self.output_hidden_states:

View File

@ -122,7 +122,7 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a
tf_inputs = tf_model.dummy_inputs tf_inputs = tf_model.dummy_inputs
if tf_inputs is not None: if tf_inputs is not None:
tfo = tf_model(tf_inputs, training=False) # Make sure model is built tf_model(tf_inputs, training=False) # Make sure model is built
# Adapt state dict - TODO remove this and update the AWS weights files instead # Adapt state dict - TODO remove this and update the AWS weights files instead
# Convert old format to new format if needed from a PyTorch state_dict # Convert old format to new format if needed from a PyTorch state_dict
@ -187,7 +187,7 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a
K.batch_set_value(weight_value_tuples) K.batch_set_value(weight_value_tuples)
if tf_inputs is not None: if tf_inputs is not None:
tfo = tf_model(tf_inputs, training=False) # Make sure restore ops are run tf_model(tf_inputs, training=False) # Make sure restore ops are run
logger.info("Loaded {:,} parameters in the TF 2.0 model.".format(tf_loaded_numel)) logger.info("Loaded {:,} parameters in the TF 2.0 model.".format(tf_loaded_numel))
@ -218,7 +218,6 @@ def load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path, tf_inputs
import transformers import transformers
tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Loading TensorFlow weights from {}".format(tf_checkpoint_path)) logger.info("Loading TensorFlow weights from {}".format(tf_checkpoint_path))
# Instantiate and load the associated TF 2.0 model # Instantiate and load the associated TF 2.0 model
@ -230,7 +229,7 @@ def load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path, tf_inputs
tf_inputs = tf_model.dummy_inputs tf_inputs = tf_model.dummy_inputs
if tf_inputs is not None: if tf_inputs is not None:
tfo = tf_model(tf_inputs, training=False) # Make sure model is built tf_model(tf_inputs, training=False) # Make sure model is built
tf_model.load_weights(tf_checkpoint_path, by_name=True) tf_model.load_weights(tf_checkpoint_path, by_name=True)

View File

@ -491,7 +491,7 @@ class TFT5MainLayer(tf.keras.layers.Layer):
all_attentions = all_attentions + (layer_outputs[1],) all_attentions = all_attentions + (layer_outputs[1],)
hidden_states = self.final_layer_norm(hidden_states) hidden_states = self.final_layer_norm(hidden_states)
layer_output = self.dropout(hidden_states, training=training) hidden_states = self.dropout(hidden_states, training=training)
# Add last layer # Add last layer
if self.output_hidden_states: if self.output_hidden_states:

View File

@ -118,7 +118,6 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
hidden, target = inputs hidden, target = inputs
head_logprob = 0 head_logprob = 0
if self.n_clusters == 0: if self.n_clusters == 0:
softmax_b = tf.get_variable("bias", [self.config.vocab_size], initializer=tf.zeros_initializer())
output = self._logit(hidden, self.out_layers[0][0], self.out_layers[0][1], self.out_projs[0]) output = self._logit(hidden, self.out_layers[0][0], self.out_layers[0][1], self.out_projs[0])
if target is not None: if target is not None:
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output)

View File

@ -320,7 +320,7 @@ class TFPreTrainedModel(tf.keras.Model):
# Load from a PyTorch checkpoint # Load from a PyTorch checkpoint
return load_pytorch_checkpoint_in_tf2_model(model, resolved_archive_file, allow_missing_keys=True) return load_pytorch_checkpoint_in_tf2_model(model, resolved_archive_file, allow_missing_keys=True)
ret = model(model.dummy_inputs, training=False) # build the network with dummy inputs model(model.dummy_inputs, training=False) # build the network with dummy inputs
assert os.path.isfile(resolved_archive_file), "Error retrieving file {}".format(resolved_archive_file) assert os.path.isfile(resolved_archive_file), "Error retrieving file {}".format(resolved_archive_file)
# 'by_name' allow us to do transfer learning by skipping/adding layers # 'by_name' allow us to do transfer learning by skipping/adding layers
@ -333,7 +333,7 @@ class TFPreTrainedModel(tf.keras.Model):
"If you tried to load a TF 2.0 model from a PyTorch checkpoint, please set from_pt=True. " "If you tried to load a TF 2.0 model from a PyTorch checkpoint, please set from_pt=True. "
) )
ret = model(model.dummy_inputs, training=False) # Make sure restore ops are run model(model.dummy_inputs, training=False) # Make sure restore ops are run
# Check if the models are the same to output loading informations # Check if the models are the same to output loading informations
with h5py.File(resolved_archive_file, "r") as f: with h5py.File(resolved_archive_file, "r") as f:
@ -515,7 +515,7 @@ class TFSequenceSummary(tf.keras.layers.Layer):
cls_index = inputs[1] if len(inputs) > 1 else None cls_index = inputs[1] if len(inputs) > 1 else None
assert len(inputs) <= 2, "Too many inputs." assert len(inputs) <= 2, "Too many inputs."
else: else:
input_ids = inputs.get("input_ids") hidden_states = inputs.get("hidden_states")
cls_index = inputs.get("cls_index", None) cls_index = inputs.get("cls_index", None)
if self.summary_type == "last": if self.summary_type == "last":

View File

@ -868,8 +868,6 @@ def write_predictions_extended(
orig_data = json.load(reader)["data"] orig_data = json.load(reader)["data"]
qid_to_has_ans = make_qid_to_has_ans(orig_data) qid_to_has_ans = make_qid_to_has_ans(orig_data)
has_ans_qids = [k for k, v in qid_to_has_ans.items() if v]
no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v]
exact_raw, f1_raw = get_raw_scores(orig_data, all_predictions) exact_raw, f1_raw = get_raw_scores(orig_data, all_predictions)
out_eval = {} out_eval = {}

View File

@ -284,7 +284,6 @@ class ModelTesterMixin:
multihead_outputs = head_mask.grad multihead_outputs = head_mask.grad
attentions = outputs[-1] attentions = outputs[-1]
hidden_states = outputs[-2]
# Remove Nan # Remove Nan
for t in attentions: for t in attentions:
@ -590,7 +589,7 @@ class ModelTesterMixin:
inputs_dict["decoder_inputs_embeds"] = wte(decoder_input_ids) inputs_dict["decoder_inputs_embeds"] = wte(decoder_input_ids)
with torch.no_grad(): with torch.no_grad():
outputs = model(**inputs_dict) model(**inputs_dict)
class ConfigTester(object): class ConfigTester(object):

View File

@ -332,7 +332,7 @@ class TFModelTesterMixin:
inputs_dict["encoder_inputs_embeds"] = self._get_embeds(wte, encoder_input_ids) inputs_dict["encoder_inputs_embeds"] = self._get_embeds(wte, encoder_input_ids)
inputs_dict["decoder_inputs_embeds"] = self._get_embeds(wte, decoder_input_ids) inputs_dict["decoder_inputs_embeds"] = self._get_embeds(wte, decoder_input_ids)
outputs = model(inputs_dict) model(inputs_dict)
def ids_tensor(shape, vocab_size, rng=None, name=None, dtype=None): def ids_tensor(shape, vocab_size, rng=None, name=None, dtype=None):

View File

@ -224,7 +224,6 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
inputs = {"input_ids": input_ids, "lengths": input_lengths} inputs = {"input_ids": input_ids, "lengths": input_lengths}
outputs = model(inputs)
start_logits, end_logits = model(inputs) start_logits, end_logits = model(inputs)
result = { result = {

View File

@ -159,7 +159,6 @@ class TokenizerTesterMixin:
self.assertEqual(all_size_2, all_size + len(new_toks)) self.assertEqual(all_size_2, all_size + len(new_toks))
tokens = tokenizer.encode("aaaaa bbbbbb low cccccccccdddddddd l", add_special_tokens=False) tokens = tokenizer.encode("aaaaa bbbbbb low cccccccccdddddddd l", add_special_tokens=False)
out_string = tokenizer.decode(tokens)
self.assertGreaterEqual(len(tokens), 4) self.assertGreaterEqual(len(tokens), 4)
self.assertGreater(tokens[0], tokenizer.vocab_size - 1) self.assertGreater(tokens[0], tokenizer.vocab_size - 1)
@ -178,7 +177,6 @@ class TokenizerTesterMixin:
tokens = tokenizer.encode( tokens = tokenizer.encode(
">>>>|||<||<<|<< aaaaabbbbbb low cccccccccdddddddd <<<<<|||>|>>>>|> l", add_special_tokens=False ">>>>|||<||<<|<< aaaaabbbbbb low cccccccccdddddddd <<<<<|||>|>>>>|> l", add_special_tokens=False
) )
out_string = tokenizer.decode(tokens)
self.assertGreaterEqual(len(tokens), 6) self.assertGreaterEqual(len(tokens), 6)
self.assertGreater(tokens[0], tokenizer.vocab_size - 1) self.assertGreater(tokens[0], tokenizer.vocab_size - 1)