fix typo/presentation

This commit is contained in:
VictorSanh 2019-06-01 17:42:00 -04:00
parent 8f97f6c57f
commit cdf0f2fec3
2 changed files with 97 additions and 97 deletions

View File

@ -1,27 +1,27 @@
from pytorch_pretrained_bert.tokenization_gpt2 import GPT2Tokenizer from pytorch_pretrained_bert.tokenization_gpt2 import GPT2Tokenizer
from pytorch_pretrained_bert.modeling_gpt2 import ( from pytorch_pretrained_bert.modeling_gpt2 import (
GPT2Model, GPT2Model,
GPT2LMHeadModel, GPT2LMHeadModel,
GPT2DoubleHeadsModel GPT2DoubleHeadsModel
) )
# A lot of models share the same param doc. Use a decorator # A lot of models share the same param doc. Use a decorator
# to save typing # to save typing
gpt2_docstring = """ gpt2_docstring = """
Params: Params:
pretrained_model_name_or_path: either: pretrained_model_name_or_path: either:
- a str with the name of a pre-trained model to load selected in the list of: - a str with the name of a pre-trained model to load selected in the list of:
. `gpt2` . `gpt2`
- a path or url to a pretrained model archive containing: - a path or url to a pretrained model archive containing:
. `gpt2_config.json` a configuration file for the model . `gpt2_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a GPT2Model instance . `pytorch_model.bin` a PyTorch dump of a GPT2Model instance
- a path or url to a pretrained model archive containing: - a path or url to a pretrained model archive containing:
. `gpt2_config.json` a configuration file for the model . `gpt2_config.json` a configuration file for the model
. a TensorFlow checkpoint with trained weights . a TensorFlow checkpoint with trained weights
from_tf: should we load the weights from a locally saved TensorFlow checkpoint from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached. cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific GPT-2 class *inputs, **kwargs: additional input for the specific GPT-2 class
""" """
@ -35,27 +35,27 @@ def _append_from_pretrained_docstring(docstr):
def gpt2Tokenizer(*args, **kwargs): def gpt2Tokenizer(*args, **kwargs):
""" """
Instantiate a GPT-2 BPE tokenizer for OpenAI GPT-2 from a pre-trained/customized vocab file. Instantiate a GPT-2 BPE tokenizer for OpenAI GPT-2 from a pre-trained/customized vocab file.
Peculiarities: Peculiarities:
- Byte-level BPE - Byte-level BPE
Args: Args:
pretrained_model_name_or_path: Path to pretrained model archive pretrained_model_name_or_path: Path to pretrained model archive
or one of pre-trained vocab configs below. or one of pre-trained vocab configs below.
* openai-gpt * gpt2
Keyword args: Keyword args:
special_tokens: Special tokens in vocabulary that are not pretrained ([SEP], [CLS]...) special_tokens: Special tokens in vocabulary that are not pretrained ([SEP], [CLS]...)
Default: None Default: None
max_len: An artificial maximum length to truncate tokenized sequences to; max_len: An artificial maximum length to truncate tokenized sequences to;
Effective maximum length is always the minimum of this Effective maximum length is always the minimum of this
value (if specified) and the underlying BERT model's value (if specified) and the underlying BERT model's
sequence length. sequence length.
Default: None Default: None
Example: Example:
>>> import torch >>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2') >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
>>> text = "Who was Jim Henson ?" >>> text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(tokenized_text) >>> indexed_tokens = tokenizer.encode(tokenized_text)
""" """
tokenizer = GPT2Tokenizer.from_pretrained(*args, **kwargs) tokenizer = GPT2Tokenizer.from_pretrained(*args, **kwargs)
@ -66,31 +66,31 @@ def gpt2Tokenizer(*args, **kwargs):
def gpt2Model(*args, **kwargs): def gpt2Model(*args, **kwargs):
""" """
gpt2Model is the basic OpenAI GPT-2 Transformer model based on gpt2Model is the basic OpenAI GPT-2 Transformer model based on
identical stacked masked self-attention blocks and pre-trained identical stacked masked self-attention blocks and pre-trained
on large scale dataset using language modeling signal. on large scale dataset using language modeling signal.
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch >>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2') >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" >>> text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" >>> text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) >>> indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) >>> indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2Model # Load gpt2Model
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Model', 'gpt2') >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Model', 'gpt2')
>>> model.eval() >>> model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions # past can be used to reuse precomputed hidden state in a subsequent predictions
>>> with torch.no_grad(): >>> with torch.no_grad():
hidden_states_1, past = model(tokens_tensor_1) hidden_states_1, past = model(tokens_tensor_1)
hidden_states_2, past = model(tokens_tensor_2, past=past) hidden_states_2, past = model(tokens_tensor_2, past=past)
""" """
model = GPT2Model.from_pretrained(*args, **kwargs) model = GPT2Model.from_pretrained(*args, **kwargs)
return model return model
@ -100,34 +100,34 @@ def gpt2Model(*args, **kwargs):
def gpt2LMHeadModel(*args, **kwargs): def gpt2LMHeadModel(*args, **kwargs):
""" """
gpt2LMHeadModel is the OpenAI GPT-2 Transformer model with the gpt2LMHeadModel is the OpenAI GPT-2 Transformer model with the
tied (pre-trained) language modeling head on top. tied (pre-trained) language modeling head on top.
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch >>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2') >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" >>> text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" >>> text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) >>> indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) >>> indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2LMHeadModel # Load gpt2LMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2LMHeadModel', 'gpt2') >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2LMHeadModel', 'gpt2')
>>> model.eval() >>> model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions # past can be used to reuse precomputed hidden state in a subsequent predictions
>>> with torch.no_grad(): >>> with torch.no_grad():
predictions_1, past = model(tokens_tensor_1) predictions_1, past = model(tokens_tensor_1)
predictions_2, past = model(tokens_tensor_2, past=past) predictions_2, past = model(tokens_tensor_2, past=past)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() >>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.decode([predicted_index]) >>> predicted_token = tokenizer.decode([predicted_index])
>>> assert predicted_token == ' who' >>> assert predicted_token == ' who'
""" """
model = GPT2LMHeadModel.from_pretrained(*args, **kwargs) model = GPT2LMHeadModel.from_pretrained(*args, **kwargs)
@ -138,19 +138,19 @@ def gpt2LMHeadModel(*args, **kwargs):
def gpt2DoubleHeadsModel(*args, **kwargs): def gpt2DoubleHeadsModel(*args, **kwargs):
""" """
gpt2DoubleHeadsModel is the OpenAI GPT-2 Transformer model with the gpt2DoubleHeadsModel is the OpenAI GPT-2 Transformer model with the
tied (pre-trained) language modeling head and a multiple choice tied (pre-trained) language modeling head and a multiple choice
classification head (only initialized, not pre-trained). classification head (only initialized, not pre-trained).
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch >>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2') >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input # Prepare tokenized input
>>> text = "Who was Jim Henson ?" >>> text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(text) >>> indexed_tokens = tokenizer.encode(text)
>>> tokens_tensor = torch.tensor([indexed_tokens]) >>> tokens_tensor = torch.tensor([indexed_tokens])
>>> mc_token_ids = torch.LongTensor([ [len(indexed_tokens)] ]) >>> mc_token_ids = torch.LongTensor([ [len(indexed_tokens)] ])
# Load gpt2DoubleHeadsModel # Load gpt2DoubleHeadsModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2DoubleHeadsModel', 'gpt2') >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2DoubleHeadsModel', 'gpt2')

View File

@ -1,7 +1,7 @@
from pytorch_pretrained_bert.tokenization_transfo_xl import TransfoXLTokenizer from pytorch_pretrained_bert.tokenization_transfo_xl import TransfoXLTokenizer
from pytorch_pretrained_bert.modeling_transfo_xl import ( from pytorch_pretrained_bert.modeling_transfo_xl import (
TransfoXLModel, TransfoXLModel,
TransfoXLLMHeadModel TransfoXLLMHeadModel
) )
# A lot of models share the same param doc. Use a decorator # A lot of models share the same param doc. Use a decorator
@ -11,20 +11,20 @@ transformer_xl_docstring = """
- you don't need to specify positioning embeddings indices - you don't need to specify positioning embeddings indices
- the tokens in the vocabulary have to be sorted to decreasing frequency. - the tokens in the vocabulary have to be sorted to decreasing frequency.
Params: Params:
pretrained_model_name_or_path: either: pretrained_model_name_or_path: either:
- a str with the name of a pre-trained model to load selected in the list of: - a str with the name of a pre-trained model to load selected in the list of:
. `transfo-xl-wt103` . `transfo-xl-wt103`
- a path or url to a pretrained model archive containing: - a path or url to a pretrained model archive containing:
. `transfo_xl_config.json` a configuration file for the model . `transfo_xl_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a TransfoXLModel instance . `pytorch_model.bin` a PyTorch dump of a TransfoXLModel instance
- a path or url to a pretrained model archive containing: - a path or url to a pretrained model archive containing:
. `transfo_xl_config.json` a configuration file for the model . `transfo_xl_config.json` a configuration file for the model
. `model.chkpt` a TensorFlow checkpoint . `model.chkpt` a TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached. cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of pre-trained models state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific TransformerXL class *inputs, **kwargs: additional input for the specific TransformerXL class
""" """
@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs):
* transfo-xl-wt103 * transfo-xl-wt103
Example: Example:
>>> import torch >>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103') >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103')
>>> text = "Who was Jim Henson ?" >>> text = "Who was Jim Henson ?"
>>> tokenized_text = tokenizer.tokenize(tokenized_text) >>> tokenized_text = tokenizer.tokenize(tokenized_text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
""" """
tokenizer = TransfoXLTokenizer.from_pretrained(*args, **kwargs) tokenizer = TransfoXLTokenizer.from_pretrained(*args, **kwargs)
return tokenizer return tokenizer
@ -60,33 +60,33 @@ def transformerXLTokenizer(*args, **kwargs):
def transformerXLModel(*args, **kwargs): def transformerXLModel(*args, **kwargs):
""" """
gpt2Model is the basic OpenAI GPT-2 Transformer model based on gpt2Model is the basic OpenAI GPT-2 Transformer model based on
identical stacked masked self-attention blocks and pre-trained identical stacked masked self-attention blocks and pre-trained
on large scale dataset using language modeling signal. on large scale dataset using language modeling signal.
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch >>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103') >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" >>> text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" >>> text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1) >>> tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2) >>> tokenized_text_2 = tokenizer.tokenize(text_2)
>>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) >>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) >>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLModel # Load transformerXLModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLModel', 'transfo-xl-wt103') >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLModel', 'transfo-xl-wt103')
>>> model.eval() >>> model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context # We can re-use the memory cells in a subsequent call to attend a longer context
>>> with torch.no_grad(): >>> with torch.no_grad():
hidden_states_1, mems_1 = model(tokens_tensor_1) hidden_states_1, mems_1 = model(tokens_tensor_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1) hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
""" """
model = TransfoXLModel.from_pretrained(*args, **kwargs) model = TransfoXLModel.from_pretrained(*args, **kwargs)
return model return model
@ -96,37 +96,37 @@ def transformerXLModel(*args, **kwargs):
def transformerXLLMHeadModel(*args, **kwargs): def transformerXLLMHeadModel(*args, **kwargs):
""" """
gpt2LMHeadModel is the OpenAI GPT-2 Transformer model with the gpt2LMHeadModel is the OpenAI GPT-2 Transformer model with the
tied (pre-trained) language modeling head on top. tied (pre-trained) language modeling head on top.
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch >>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103') >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" >>> text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" >>> text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1) >>> tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2) >>> tokenized_text_2 = tokenizer.tokenize(text_2)
>>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) >>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) >>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLLMHeadModel # Load transformerXLLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLLMHeadModel', 'transfo-xl-wt103') >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
>>> model.eval() >>> model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context # We can re-use the memory cells in a subsequent call to attend a longer context
>>> with torch.no_grad(): >>> with torch.no_grad():
predictions_1, mems_1 = model(tokens_tensor_1) predictions_1, mems_1 = model(tokens_tensor_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1) predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() >>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] >>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
>>> assert predicted_token == 'who' >>> assert predicted_token == 'who'
""" """
model = TransfoXLLMHeadModel.from_pretrained(*args, **kwargs) model = TransfoXLLMHeadModel.from_pretrained(*args, **kwargs)
return model return model