mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-01 18:51:14 +06:00
256 lines
14 KiB
Python
256 lines
14 KiB
Python
# coding=utf-8
|
|
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
|
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""TF general model utils."""
|
|
|
|
from __future__ import (absolute_import, division, print_function,
|
|
unicode_literals)
|
|
|
|
import logging
|
|
import os
|
|
|
|
import tensorflow as tf
|
|
|
|
from .configuration_utils import PretrainedConfig
|
|
from .file_utils import cached_path, WEIGHTS_NAME, TF_WEIGHTS_NAME
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class TFPreTrainedModel(tf.keras.Model):
|
|
r""" Base class for all TF models.
|
|
|
|
:class:`~pytorch_transformers.TFPreTrainedModel` takes care of storing the configuration of the models and handles methods for loading/downloading/saving models
|
|
as well as a few methods commons to all models to (i) resize the input embeddings and (ii) prune heads in the self-attention heads.
|
|
|
|
Class attributes (overridden by derived classes):
|
|
- ``config_class``: a class derived from :class:`~pytorch_transformers.PretrainedConfig` to use as configuration class for this model architecture.
|
|
- ``pretrained_model_archive_map``: a python ``dict`` of with `short-cut-names` (string) as keys and `url` (string) of associated pretrained weights as values.
|
|
- ``load_tf_weights``: a python ``method`` for loading a TensorFlow checkpoint in a PyTorch model, taking as arguments:
|
|
|
|
- ``model``: an instance of the relevant subclass of :class:`~pytorch_transformers.PreTrainedModel`,
|
|
- ``config``: an instance of the relevant subclass of :class:`~pytorch_transformers.PretrainedConfig`,
|
|
- ``path``: a path (string) to the TensorFlow checkpoint.
|
|
|
|
- ``base_model_prefix``: a string indicating the attribute associated to the base model in derived classes of the same architecture adding modules on top of the base model.
|
|
"""
|
|
config_class = None
|
|
pretrained_model_archive_map = {}
|
|
load_pt_weights = lambda model, config, path: None
|
|
base_model_prefix = ""
|
|
|
|
def __init__(self, config, *inputs, **kwargs):
|
|
super(TFPreTrainedModel, self).__init__()
|
|
if not isinstance(config, PretrainedConfig):
|
|
raise ValueError(
|
|
"Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
|
|
"To create a model from a pretrained model use "
|
|
"`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format(
|
|
self.__class__.__name__, self.__class__.__name__
|
|
))
|
|
# Save config in model
|
|
self.config = config
|
|
|
|
def _get_resized_embeddings(self, old_embeddings, new_num_tokens=None):
|
|
""" Build a resized Embedding Module from a provided token Embedding Module.
|
|
Increasing the size will add newly initialized vectors at the end
|
|
Reducing the size will remove vectors from the end
|
|
|
|
Args:
|
|
new_num_tokens: (`optional`) int
|
|
New number of tokens in the embedding matrix.
|
|
Increasing the size will add newly initialized vectors at the end
|
|
Reducing the size will remove vectors from the end
|
|
If not provided or None: return the provided token Embedding Module.
|
|
Return: ``torch.nn.Embeddings``
|
|
Pointer to the resized Embedding Module or the old Embedding Module if new_num_tokens is None
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def _tie_or_clone_weights(self, first_module, second_module):
|
|
""" Tie or clone module weights depending of weither we are using TorchScript or not
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def resize_token_embeddings(self, new_num_tokens=None):
|
|
""" Resize input token embeddings matrix of the model if new_num_tokens != config.vocab_size.
|
|
Take care of tying weights embeddings afterwards if the model class has a `tie_weights()` method.
|
|
|
|
Arguments:
|
|
|
|
new_num_tokens: (`optional`) int:
|
|
New number of tokens in the embedding matrix. Increasing the size will add newly initialized vectors at the end. Reducing the size will remove vectors from the end.
|
|
If not provided or None: does nothing and just returns a pointer to the input tokens ``torch.nn.Embeddings`` Module of the model.
|
|
|
|
Return: ``torch.nn.Embeddings``
|
|
Pointer to the input tokens Embeddings Module of the model
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def prune_heads(self, heads_to_prune):
|
|
""" Prunes heads of the base model.
|
|
|
|
Arguments:
|
|
|
|
heads_to_prune: dict with keys being selected layer indices (`int`) and associated values being the list of heads to prune in said layer (list of `int`).
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def save_pretrained(self, save_directory):
|
|
""" Save a model and its configuration file to a directory, so that it
|
|
can be re-loaded using the `:func:`~pytorch_transformers.PreTrainedModel.from_pretrained`` class method.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
@classmethod
|
|
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
|
|
r"""Instantiate a pretrained pytorch model from a pre-trained model configuration.
|
|
|
|
The model is set in evaluation mode by default using ``model.eval()`` (Dropout modules are deactivated)
|
|
To train the model, you should first set it back in training mode with ``model.train()``
|
|
|
|
The warning ``Weights from XXX not initialized from pretrained model`` means that the weights of XXX do not come pre-trained with the rest of the model.
|
|
It is up to you to train those weights with a downstream fine-tuning task.
|
|
|
|
The warning ``Weights from XXX not used in YYY`` means that the layer XXX is not used by YYY, therefore those weights are discarded.
|
|
|
|
Parameters:
|
|
pretrained_model_name_or_path: either:
|
|
|
|
- a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
|
|
- a path to a `directory` containing model weights saved using :func:`~pytorch_transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
|
|
- a path or url to a `PyTorch state_dict save file` (e.g. `./pt_model/pytorch_model.bin`). In this case, ``from_pt`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the PyTorch checkpoint in a TensorFlow model using the provided conversion scripts and loading the TensorFlow model afterwards.
|
|
|
|
model_args: (`optional`) Sequence of positional arguments:
|
|
All remaning positional arguments will be passed to the underlying model's ``__init__`` method
|
|
|
|
config: (`optional`) instance of a class derived from :class:`~pytorch_transformers.PretrainedConfig`:
|
|
Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
|
|
|
|
- the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
|
|
- the model was saved using :func:`~pytorch_transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
|
|
- the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
|
|
|
|
from_pt: (`optional`) boolean, default False:
|
|
Load the model weights from a PyTorch state_dict save file (see docstring of pretrained_model_name_or_path argument).
|
|
|
|
cache_dir: (`optional`) string:
|
|
Path to a directory in which a downloaded pre-trained model
|
|
configuration should be cached if the standard cache should not be used.
|
|
|
|
force_download: (`optional`) boolean, default False:
|
|
Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
|
|
|
|
proxies: (`optional`) dict, default None:
|
|
A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
|
|
The proxies are used on each request.
|
|
|
|
output_loading_info: (`optional`) boolean:
|
|
Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
|
|
|
|
kwargs: (`optional`) Remaining dictionary of keyword arguments:
|
|
Can be used to update the configuration object (after it being loaded) and initiate the model. (e.g. ``output_attention=True``). Behave differently depending on whether a `config` is provided or automatically loaded:
|
|
|
|
- If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the underlying model's ``__init__`` method (we assume all relevant updates to the configuration have already been done)
|
|
- If a configuration is not provided, ``kwargs`` will be first passed to the configuration class initialization function (:func:`~pytorch_transformers.PretrainedConfig.from_pretrained`). Each key of ``kwargs`` that corresponds to a configuration attribute will be used to override said attribute with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration attribute will be passed to the underlying model's ``__init__`` function.
|
|
|
|
Examples::
|
|
|
|
model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
|
|
model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
|
|
model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
|
|
assert model.config.output_attention == True
|
|
# Loading from a TF checkpoint file instead of a PyTorch model (slower)
|
|
config = BertConfig.from_json_file('./tf_model/my_tf_model_config.json')
|
|
model = BertModel.from_pretrained('./tf_model/my_tf_checkpoint.ckpt.index', from_pt=True, config=config)
|
|
|
|
"""
|
|
config = kwargs.pop('config', None)
|
|
cache_dir = kwargs.pop('cache_dir', None)
|
|
from_pt = kwargs.pop('from_pt', False)
|
|
force_download = kwargs.pop('force_download', False)
|
|
proxies = kwargs.pop('proxies', None)
|
|
output_loading_info = kwargs.pop('output_loading_info', False)
|
|
|
|
# Load config
|
|
if config is None:
|
|
config, model_kwargs = cls.config_class.from_pretrained(
|
|
pretrained_model_name_or_path, *model_args,
|
|
cache_dir=cache_dir, return_unused_kwargs=True,
|
|
force_download=force_download,
|
|
**kwargs
|
|
)
|
|
else:
|
|
model_kwargs = kwargs
|
|
|
|
# Load model
|
|
if pretrained_model_name_or_path in cls.pretrained_model_archive_map:
|
|
archive_file = cls.pretrained_model_archive_map[pretrained_model_name_or_path]
|
|
elif os.path.isdir(pretrained_model_name_or_path):
|
|
if from_pt:
|
|
# Load from a PyTorch checkpoint
|
|
archive_file = os.path.join(pretrained_model_name_or_path, WEIGHTS_NAME)
|
|
else:
|
|
archive_file = os.path.join(pretrained_model_name_or_path, TF_WEIGHTS_NAME)
|
|
else:
|
|
archive_file = pretrained_model_name_or_path
|
|
# redirect to the cache, if necessary
|
|
try:
|
|
resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir, force_download=force_download, proxies=proxies)
|
|
except EnvironmentError:
|
|
if pretrained_model_name_or_path in cls.pretrained_model_archive_map:
|
|
logger.error(
|
|
"Couldn't reach server at '{}' to download pretrained weights.".format(
|
|
archive_file))
|
|
else:
|
|
logger.error(
|
|
"Model name '{}' was not found in model name list ({}). "
|
|
"We assumed '{}' was a path or url but couldn't find any file "
|
|
"associated to this path or url.".format(
|
|
pretrained_model_name_or_path,
|
|
', '.join(cls.pretrained_model_archive_map.keys()),
|
|
archive_file))
|
|
return None
|
|
if resolved_archive_file == archive_file:
|
|
logger.info("loading weights file {}".format(archive_file))
|
|
else:
|
|
logger.info("loading weights file {} from cache at {}".format(
|
|
archive_file, resolved_archive_file))
|
|
|
|
# Instantiate model.
|
|
model = cls(config, *model_args, **model_kwargs)
|
|
|
|
if from_pt:
|
|
# Load from a PyTorch checkpoint
|
|
return cls.load_pt_weights(model, config, resolved_archive_file)
|
|
|
|
inputs = tf.constant([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]])
|
|
ret = model(inputs, training=False) # build the network with dummy inputs
|
|
|
|
# 'by_name' allow us to do transfer learning by skipping/adding layers
|
|
# see https://github.com/tensorflow/tensorflow/blob/00fad90125b18b80fe054de1055770cfb8fe4ba3/tensorflow/python/keras/engine/network.py#L1339-L1357
|
|
model.load_weights(resolved_archive_file, by_name=True)
|
|
|
|
ret = model(inputs, training=False) # Make sure restore ops are run
|
|
|
|
# if hasattr(model, 'tie_weights'):
|
|
# model.tie_weights() # TODO make sure word embedding weights are still tied
|
|
|
|
if output_loading_info:
|
|
loading_info = {"missing_keys": missing_keys, "unexpected_keys": unexpected_keys, "error_msgs": error_msgs}
|
|
return model, loading_info
|
|
|
|
return model
|