mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-01 02:31:11 +06:00
Fix: unpin flake8 and fix cs errors (#4367)
* Fix: unpin flake8 and fix cs errors * Ok we still need to quote those
This commit is contained in:
parent
c547f15a17
commit
448c467256
@ -478,7 +478,7 @@ def _compute_pytorch(
|
|||||||
dictionary[model_name]["memory"][batch_size][slice_size] = "N/A"
|
dictionary[model_name]["memory"][batch_size][slice_size] = "N/A"
|
||||||
|
|
||||||
if not no_speed:
|
if not no_speed:
|
||||||
print_fn("Going through model with sequence of shape".format(sequence.shape))
|
print_fn("Going through model with sequence of shape {}".format(sequence.shape))
|
||||||
runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3)
|
runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3)
|
||||||
average_time = sum(runtimes) / float(len(runtimes)) / 3.0
|
average_time = sum(runtimes) / float(len(runtimes)) / 3.0
|
||||||
dictionary[model_name]["time"][batch_size][slice_size] = average_time
|
dictionary[model_name]["time"][batch_size][slice_size] = average_time
|
||||||
|
@ -80,7 +80,7 @@ class Distiller:
|
|||||||
|
|
||||||
self.mlm = params.mlm
|
self.mlm = params.mlm
|
||||||
if self.mlm:
|
if self.mlm:
|
||||||
logger.info(f"Using MLM loss for LM step.")
|
logger.info("Using MLM loss for LM step.")
|
||||||
self.mlm_mask_prop = params.mlm_mask_prop
|
self.mlm_mask_prop = params.mlm_mask_prop
|
||||||
assert 0.0 <= self.mlm_mask_prop <= 1.0
|
assert 0.0 <= self.mlm_mask_prop <= 1.0
|
||||||
assert params.word_mask + params.word_keep + params.word_rand == 1.0
|
assert params.word_mask + params.word_keep + params.word_rand == 1.0
|
||||||
@ -91,7 +91,7 @@ class Distiller:
|
|||||||
self.pred_probs = self.pred_probs.half()
|
self.pred_probs = self.pred_probs.half()
|
||||||
self.token_probs = self.token_probs.half()
|
self.token_probs = self.token_probs.half()
|
||||||
else:
|
else:
|
||||||
logger.info(f"Using CLM loss for LM step.")
|
logger.info("Using CLM loss for LM step.")
|
||||||
|
|
||||||
self.epoch = 0
|
self.epoch = 0
|
||||||
self.n_iter = 0
|
self.n_iter = 0
|
||||||
@ -365,8 +365,8 @@ class Distiller:
|
|||||||
self.end_epoch()
|
self.end_epoch()
|
||||||
|
|
||||||
if self.is_master:
|
if self.is_master:
|
||||||
logger.info(f"Save very last checkpoint as `pytorch_model.bin`.")
|
logger.info("Save very last checkpoint as `pytorch_model.bin`.")
|
||||||
self.save_checkpoint(checkpoint_name=f"pytorch_model.bin")
|
self.save_checkpoint(checkpoint_name="pytorch_model.bin")
|
||||||
logger.info("Training is finished")
|
logger.info("Training is finished")
|
||||||
|
|
||||||
def step(self, input_ids: torch.tensor, attention_mask: torch.tensor, lm_labels: torch.tensor):
|
def step(self, input_ids: torch.tensor, attention_mask: torch.tensor, lm_labels: torch.tensor):
|
||||||
|
@ -60,7 +60,7 @@ def main():
|
|||||||
with open(args.file_path, "r", encoding="utf8") as fp:
|
with open(args.file_path, "r", encoding="utf8") as fp:
|
||||||
data = fp.readlines()
|
data = fp.readlines()
|
||||||
|
|
||||||
logger.info(f"Start encoding")
|
logger.info("Start encoding")
|
||||||
logger.info(f"{len(data)} examples to process.")
|
logger.info(f"{len(data)} examples to process.")
|
||||||
|
|
||||||
rslt = []
|
rslt = []
|
||||||
|
@ -93,7 +93,7 @@ if __name__ == "__main__":
|
|||||||
elif args.model_type == "gpt2":
|
elif args.model_type == "gpt2":
|
||||||
for w in ["weight", "bias"]:
|
for w in ["weight", "bias"]:
|
||||||
compressed_sd[f"{prefix}.ln_f.{w}"] = state_dict[f"{prefix}.ln_f.{w}"]
|
compressed_sd[f"{prefix}.ln_f.{w}"] = state_dict[f"{prefix}.ln_f.{w}"]
|
||||||
compressed_sd[f"lm_head.weight"] = state_dict[f"lm_head.weight"]
|
compressed_sd["lm_head.weight"] = state_dict["lm_head.weight"]
|
||||||
|
|
||||||
print(f"N layers selected for distillation: {std_idx}")
|
print(f"N layers selected for distillation: {std_idx}")
|
||||||
print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}")
|
print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}")
|
||||||
|
@ -37,7 +37,7 @@ if __name__ == "__main__":
|
|||||||
model = BertForMaskedLM.from_pretrained(args.model_name)
|
model = BertForMaskedLM.from_pretrained(args.model_name)
|
||||||
prefix = "bert"
|
prefix = "bert"
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'args.model_type should be "bert".')
|
raise ValueError('args.model_type should be "bert".')
|
||||||
|
|
||||||
state_dict = model.state_dict()
|
state_dict = model.state_dict()
|
||||||
compressed_sd = {}
|
compressed_sd = {}
|
||||||
@ -78,12 +78,12 @@ if __name__ == "__main__":
|
|||||||
]
|
]
|
||||||
std_idx += 1
|
std_idx += 1
|
||||||
|
|
||||||
compressed_sd[f"vocab_projector.weight"] = state_dict[f"cls.predictions.decoder.weight"]
|
compressed_sd["vocab_projector.weight"] = state_dict["cls.predictions.decoder.weight"]
|
||||||
compressed_sd[f"vocab_projector.bias"] = state_dict[f"cls.predictions.bias"]
|
compressed_sd["vocab_projector.bias"] = state_dict["cls.predictions.bias"]
|
||||||
if args.vocab_transform:
|
if args.vocab_transform:
|
||||||
for w in ["weight", "bias"]:
|
for w in ["weight", "bias"]:
|
||||||
compressed_sd[f"vocab_transform.{w}"] = state_dict[f"cls.predictions.transform.dense.{w}"]
|
compressed_sd[f"vocab_transform.{w}"] = state_dict["cls.predictions.transform.dense.{w}"]
|
||||||
compressed_sd[f"vocab_layer_norm.{w}"] = state_dict[f"cls.predictions.transform.LayerNorm.{w}"]
|
compressed_sd[f"vocab_layer_norm.{w}"] = state_dict["cls.predictions.transform.LayerNorm.{w}"]
|
||||||
|
|
||||||
print(f"N layers selected for distillation: {std_idx}")
|
print(f"N layers selected for distillation: {std_idx}")
|
||||||
print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}")
|
print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}")
|
||||||
|
@ -273,7 +273,7 @@ def main():
|
|||||||
token_probs = None
|
token_probs = None
|
||||||
|
|
||||||
train_lm_seq_dataset = LmSeqsDataset(params=args, data=data)
|
train_lm_seq_dataset = LmSeqsDataset(params=args, data=data)
|
||||||
logger.info(f"Data loader created.")
|
logger.info("Data loader created.")
|
||||||
|
|
||||||
# STUDENT #
|
# STUDENT #
|
||||||
logger.info(f"Loading student config from {args.student_config}")
|
logger.info(f"Loading student config from {args.student_config}")
|
||||||
@ -288,7 +288,7 @@ def main():
|
|||||||
|
|
||||||
if args.n_gpu > 0:
|
if args.n_gpu > 0:
|
||||||
student.to(f"cuda:{args.local_rank}")
|
student.to(f"cuda:{args.local_rank}")
|
||||||
logger.info(f"Student loaded.")
|
logger.info("Student loaded.")
|
||||||
|
|
||||||
# TEACHER #
|
# TEACHER #
|
||||||
teacher = teacher_model_class.from_pretrained(args.teacher_name, output_hidden_states=True)
|
teacher = teacher_model_class.from_pretrained(args.teacher_name, output_hidden_states=True)
|
||||||
|
@ -36,5 +36,5 @@ multi_line_output = 3
|
|||||||
use_parentheses = True
|
use_parentheses = True
|
||||||
|
|
||||||
[flake8]
|
[flake8]
|
||||||
ignore = E203, E501, W503
|
ignore = E203, E501, E741, W503
|
||||||
max-line-length = 119
|
max-line-length = 119
|
||||||
|
2
setup.py
2
setup.py
@ -79,7 +79,7 @@ extras["docs"] = ["recommonmark", "sphinx", "sphinx-markdown-tables", "sphinx-rt
|
|||||||
extras["quality"] = [
|
extras["quality"] = [
|
||||||
"black",
|
"black",
|
||||||
"isort @ git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort",
|
"isort @ git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort",
|
||||||
"flake8==3.7.9",
|
"flake8",
|
||||||
]
|
]
|
||||||
extras["dev"] = extras["testing"] + extras["quality"] + ["mecab-python3", "scikit-learn", "tensorflow", "torch"]
|
extras["dev"] = extras["testing"] + extras["quality"] + ["mecab-python3", "scikit-learn", "tensorflow", "torch"]
|
||||||
|
|
||||||
|
@ -226,7 +226,7 @@ def lmap(f, x) -> List:
|
|||||||
def fetch_test_set(test_set_url):
|
def fetch_test_set(test_set_url):
|
||||||
import wget
|
import wget
|
||||||
|
|
||||||
fname = wget.download(test_set_url, f"opus_test.txt")
|
fname = wget.download(test_set_url, "opus_test.txt")
|
||||||
lns = Path(fname).open().readlines()
|
lns = Path(fname).open().readlines()
|
||||||
src = lmap(str.strip, lns[::4])
|
src = lmap(str.strip, lns[::4])
|
||||||
gold = lmap(str.strip, lns[1::4])
|
gold = lmap(str.strip, lns[1::4])
|
||||||
|
@ -114,7 +114,7 @@ class GlueDataset(Dataset):
|
|||||||
torch.save(self.features, cached_features_file)
|
torch.save(self.features, cached_features_file)
|
||||||
# ^ This seems to take a lot of time so I want to investigate why and how we can improve.
|
# ^ This seems to take a lot of time so I want to investigate why and how we can improve.
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start
|
"Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start
|
||||||
)
|
)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
|
@ -65,7 +65,7 @@ class TextDataset(Dataset):
|
|||||||
with open(cached_features_file, "wb") as handle:
|
with open(cached_features_file, "wb") as handle:
|
||||||
pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start
|
"Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start
|
||||||
)
|
)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
|
@ -24,7 +24,7 @@ from abc import ABC, abstractmethod
|
|||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from os.path import abspath, exists
|
from os.path import abspath, exists
|
||||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
|
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -58,6 +58,10 @@ if is_torch_available():
|
|||||||
AutoModelWithLMHead,
|
AutoModelWithLMHead,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from .modeling_utils import PreTrainedModel
|
||||||
|
from .modeling_tf_utils import TFPreTrainedModel
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -19,11 +19,21 @@ import pickle
|
|||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from typing import Dict, Tuple, Union
|
from typing import TYPE_CHECKING, Dict, Tuple, Union
|
||||||
|
|
||||||
from tests.utils import require_tf, require_torch
|
from tests.utils import require_tf, require_torch
|
||||||
|
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from transformers import (
|
||||||
|
PretrainedConfig,
|
||||||
|
PreTrainedTokenizer,
|
||||||
|
PreTrainedTokenizerFast,
|
||||||
|
PreTrainedModel,
|
||||||
|
TFPreTrainedModel,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def merge_model_tokenizer_mappings(
|
def merge_model_tokenizer_mappings(
|
||||||
model_mapping: Dict["PretrainedConfig", Union["PreTrainedModel", "TFPreTrainedModel"]],
|
model_mapping: Dict["PretrainedConfig", Union["PreTrainedModel", "TFPreTrainedModel"]],
|
||||||
tokenizer_mapping: Dict["PretrainedConfig", Tuple["PreTrainedTokenizer", "PreTrainedTokenizerFast"]],
|
tokenizer_mapping: Dict["PretrainedConfig", Tuple["PreTrainedTokenizer", "PreTrainedTokenizerFast"]],
|
||||||
|
Loading…
Reference in New Issue
Block a user