Fix: unpin flake8 and fix cs errors (#4367)

* Fix: unpin flake8 and fix cs errors

* Ok we still need to quote those
This commit is contained in:
Julien Chaumond 2020-05-14 13:14:26 -04:00 committed by GitHub
parent c547f15a17
commit 448c467256
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 35 additions and 21 deletions

View File

@ -478,7 +478,7 @@ def _compute_pytorch(
dictionary[model_name]["memory"][batch_size][slice_size] = "N/A" dictionary[model_name]["memory"][batch_size][slice_size] = "N/A"
if not no_speed: if not no_speed:
print_fn("Going through model with sequence of shape".format(sequence.shape)) print_fn("Going through model with sequence of shape {}".format(sequence.shape))
runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3) runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3)
average_time = sum(runtimes) / float(len(runtimes)) / 3.0 average_time = sum(runtimes) / float(len(runtimes)) / 3.0
dictionary[model_name]["time"][batch_size][slice_size] = average_time dictionary[model_name]["time"][batch_size][slice_size] = average_time

View File

@ -80,7 +80,7 @@ class Distiller:
self.mlm = params.mlm self.mlm = params.mlm
if self.mlm: if self.mlm:
logger.info(f"Using MLM loss for LM step.") logger.info("Using MLM loss for LM step.")
self.mlm_mask_prop = params.mlm_mask_prop self.mlm_mask_prop = params.mlm_mask_prop
assert 0.0 <= self.mlm_mask_prop <= 1.0 assert 0.0 <= self.mlm_mask_prop <= 1.0
assert params.word_mask + params.word_keep + params.word_rand == 1.0 assert params.word_mask + params.word_keep + params.word_rand == 1.0
@ -91,7 +91,7 @@ class Distiller:
self.pred_probs = self.pred_probs.half() self.pred_probs = self.pred_probs.half()
self.token_probs = self.token_probs.half() self.token_probs = self.token_probs.half()
else: else:
logger.info(f"Using CLM loss for LM step.") logger.info("Using CLM loss for LM step.")
self.epoch = 0 self.epoch = 0
self.n_iter = 0 self.n_iter = 0
@ -365,8 +365,8 @@ class Distiller:
self.end_epoch() self.end_epoch()
if self.is_master: if self.is_master:
logger.info(f"Save very last checkpoint as `pytorch_model.bin`.") logger.info("Save very last checkpoint as `pytorch_model.bin`.")
self.save_checkpoint(checkpoint_name=f"pytorch_model.bin") self.save_checkpoint(checkpoint_name="pytorch_model.bin")
logger.info("Training is finished") logger.info("Training is finished")
def step(self, input_ids: torch.tensor, attention_mask: torch.tensor, lm_labels: torch.tensor): def step(self, input_ids: torch.tensor, attention_mask: torch.tensor, lm_labels: torch.tensor):

View File

@ -60,7 +60,7 @@ def main():
with open(args.file_path, "r", encoding="utf8") as fp: with open(args.file_path, "r", encoding="utf8") as fp:
data = fp.readlines() data = fp.readlines()
logger.info(f"Start encoding") logger.info("Start encoding")
logger.info(f"{len(data)} examples to process.") logger.info(f"{len(data)} examples to process.")
rslt = [] rslt = []

View File

@ -93,7 +93,7 @@ if __name__ == "__main__":
elif args.model_type == "gpt2": elif args.model_type == "gpt2":
for w in ["weight", "bias"]: for w in ["weight", "bias"]:
compressed_sd[f"{prefix}.ln_f.{w}"] = state_dict[f"{prefix}.ln_f.{w}"] compressed_sd[f"{prefix}.ln_f.{w}"] = state_dict[f"{prefix}.ln_f.{w}"]
compressed_sd[f"lm_head.weight"] = state_dict[f"lm_head.weight"] compressed_sd["lm_head.weight"] = state_dict["lm_head.weight"]
print(f"N layers selected for distillation: {std_idx}") print(f"N layers selected for distillation: {std_idx}")
print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}") print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}")

View File

@ -37,7 +37,7 @@ if __name__ == "__main__":
model = BertForMaskedLM.from_pretrained(args.model_name) model = BertForMaskedLM.from_pretrained(args.model_name)
prefix = "bert" prefix = "bert"
else: else:
raise ValueError(f'args.model_type should be "bert".') raise ValueError('args.model_type should be "bert".')
state_dict = model.state_dict() state_dict = model.state_dict()
compressed_sd = {} compressed_sd = {}
@ -78,12 +78,12 @@ if __name__ == "__main__":
] ]
std_idx += 1 std_idx += 1
compressed_sd[f"vocab_projector.weight"] = state_dict[f"cls.predictions.decoder.weight"] compressed_sd["vocab_projector.weight"] = state_dict["cls.predictions.decoder.weight"]
compressed_sd[f"vocab_projector.bias"] = state_dict[f"cls.predictions.bias"] compressed_sd["vocab_projector.bias"] = state_dict["cls.predictions.bias"]
if args.vocab_transform: if args.vocab_transform:
for w in ["weight", "bias"]: for w in ["weight", "bias"]:
compressed_sd[f"vocab_transform.{w}"] = state_dict[f"cls.predictions.transform.dense.{w}"] compressed_sd[f"vocab_transform.{w}"] = state_dict["cls.predictions.transform.dense.{w}"]
compressed_sd[f"vocab_layer_norm.{w}"] = state_dict[f"cls.predictions.transform.LayerNorm.{w}"] compressed_sd[f"vocab_layer_norm.{w}"] = state_dict["cls.predictions.transform.LayerNorm.{w}"]
print(f"N layers selected for distillation: {std_idx}") print(f"N layers selected for distillation: {std_idx}")
print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}") print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}")

View File

@ -273,7 +273,7 @@ def main():
token_probs = None token_probs = None
train_lm_seq_dataset = LmSeqsDataset(params=args, data=data) train_lm_seq_dataset = LmSeqsDataset(params=args, data=data)
logger.info(f"Data loader created.") logger.info("Data loader created.")
# STUDENT # # STUDENT #
logger.info(f"Loading student config from {args.student_config}") logger.info(f"Loading student config from {args.student_config}")
@ -288,7 +288,7 @@ def main():
if args.n_gpu > 0: if args.n_gpu > 0:
student.to(f"cuda:{args.local_rank}") student.to(f"cuda:{args.local_rank}")
logger.info(f"Student loaded.") logger.info("Student loaded.")
# TEACHER # # TEACHER #
teacher = teacher_model_class.from_pretrained(args.teacher_name, output_hidden_states=True) teacher = teacher_model_class.from_pretrained(args.teacher_name, output_hidden_states=True)

View File

@ -36,5 +36,5 @@ multi_line_output = 3
use_parentheses = True use_parentheses = True
[flake8] [flake8]
ignore = E203, E501, W503 ignore = E203, E501, E741, W503
max-line-length = 119 max-line-length = 119

View File

@ -79,7 +79,7 @@ extras["docs"] = ["recommonmark", "sphinx", "sphinx-markdown-tables", "sphinx-rt
extras["quality"] = [ extras["quality"] = [
"black", "black",
"isort @ git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort", "isort @ git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort",
"flake8==3.7.9", "flake8",
] ]
extras["dev"] = extras["testing"] + extras["quality"] + ["mecab-python3", "scikit-learn", "tensorflow", "torch"] extras["dev"] = extras["testing"] + extras["quality"] + ["mecab-python3", "scikit-learn", "tensorflow", "torch"]

View File

@ -226,7 +226,7 @@ def lmap(f, x) -> List:
def fetch_test_set(test_set_url): def fetch_test_set(test_set_url):
import wget import wget
fname = wget.download(test_set_url, f"opus_test.txt") fname = wget.download(test_set_url, "opus_test.txt")
lns = Path(fname).open().readlines() lns = Path(fname).open().readlines()
src = lmap(str.strip, lns[::4]) src = lmap(str.strip, lns[::4])
gold = lmap(str.strip, lns[1::4]) gold = lmap(str.strip, lns[1::4])

View File

@ -114,7 +114,7 @@ class GlueDataset(Dataset):
torch.save(self.features, cached_features_file) torch.save(self.features, cached_features_file)
# ^ This seems to take a lot of time so I want to investigate why and how we can improve. # ^ This seems to take a lot of time so I want to investigate why and how we can improve.
logger.info( logger.info(
f"Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start "Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start
) )
def __len__(self): def __len__(self):

View File

@ -65,7 +65,7 @@ class TextDataset(Dataset):
with open(cached_features_file, "wb") as handle: with open(cached_features_file, "wb") as handle:
pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL)
logger.info( logger.info(
f"Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start "Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start
) )
def __len__(self): def __len__(self):

View File

@ -24,7 +24,7 @@ from abc import ABC, abstractmethod
from contextlib import contextmanager from contextlib import contextmanager
from itertools import chain from itertools import chain
from os.path import abspath, exists from os.path import abspath, exists
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
import numpy as np import numpy as np
@ -58,6 +58,10 @@ if is_torch_available():
AutoModelWithLMHead, AutoModelWithLMHead,
) )
if TYPE_CHECKING:
from .modeling_utils import PreTrainedModel
from .modeling_tf_utils import TFPreTrainedModel
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -19,11 +19,21 @@ import pickle
import shutil import shutil
import tempfile import tempfile
from collections import OrderedDict from collections import OrderedDict
from typing import Dict, Tuple, Union from typing import TYPE_CHECKING, Dict, Tuple, Union
from tests.utils import require_tf, require_torch from tests.utils import require_tf, require_torch
if TYPE_CHECKING:
from transformers import (
PretrainedConfig,
PreTrainedTokenizer,
PreTrainedTokenizerFast,
PreTrainedModel,
TFPreTrainedModel,
)
def merge_model_tokenizer_mappings( def merge_model_tokenizer_mappings(
model_mapping: Dict["PretrainedConfig", Union["PreTrainedModel", "TFPreTrainedModel"]], model_mapping: Dict["PretrainedConfig", Union["PreTrainedModel", "TFPreTrainedModel"]],
tokenizer_mapping: Dict["PretrainedConfig", Tuple["PreTrainedTokenizer", "PreTrainedTokenizerFast"]], tokenizer_mapping: Dict["PretrainedConfig", Tuple["PreTrainedTokenizer", "PreTrainedTokenizerFast"]],