mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-01 10:41:07 +06:00
improve doctstring (#3327)
This commit is contained in:
parent
d7f98cd3ef
commit
ddb10c6447
@ -486,15 +486,20 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
|
|||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
|
|
||||||
input_ids: (`optional`) `torch.LongTensor` of shape `(batch_size, sequence_length)`
|
input_ids: (`optional`) `tf.Tensor` of `dtype=tf.int32` of shape `(batch_size, sequence_length)`
|
||||||
The sequence used as a prompt for the generation. If `None` the method initializes
|
The sequence used as a prompt for the generation. If `None` the method initializes
|
||||||
it as an empty `torch.LongTensor` of shape `(1,)`.
|
it as an empty `torch.LongTensor` of shape `(1,)`.
|
||||||
|
|
||||||
max_length: (`optional`) int
|
max_length: (`optional`) int
|
||||||
The max length of the sequence to be generated. Between 1 and infinity. Default to 20.
|
The max length of the sequence to be generated. Between 1 and infinity. Default to 20.
|
||||||
|
|
||||||
|
min_length: (`optional`) int
|
||||||
|
The min length of the sequence to be generated. Between 0 and infinity. Default to 0.
|
||||||
do_sample: (`optional`) bool
|
do_sample: (`optional`) bool
|
||||||
If set to `False` greedy decoding is used. Otherwise sampling is used. Defaults to `False`.
|
If set to `False` greedy decoding is used. Otherwise sampling is used. Defaults to `False` as defined in `configuration_utils.PretrainedConfig`.
|
||||||
|
|
||||||
|
early_stopping: (`optional`) bool
|
||||||
|
if set to `True` beam search is stopped when at least `num_beams` sentences finished per batch. Defaults to `False` as defined in `configuration_utils.PretrainedConfig`.
|
||||||
|
|
||||||
num_beams: (`optional`) int
|
num_beams: (`optional`) int
|
||||||
Number of beams for beam search. Must be between 1 and infinity. 1 means no beam search. Default to 1.
|
Number of beams for beam search. Must be between 1 and infinity. 1 means no beam search. Default to 1.
|
||||||
@ -514,46 +519,64 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
|
|||||||
bos_token_id: (`optional`) int
|
bos_token_id: (`optional`) int
|
||||||
Beginning of sentence token if no prompt is provided. Default to 0.
|
Beginning of sentence token if no prompt is provided. Default to 0.
|
||||||
|
|
||||||
|
pad_token_id: (`optional`) int
|
||||||
|
Pad token. Defaults to pad_token_id as defined in the models config.
|
||||||
|
|
||||||
eos_token_ids: (`optional`) int or list of int
|
eos_token_ids: (`optional`) int or list of int
|
||||||
End of sequence token or list of tokens to stop the generation. Default to 0.
|
End of sequence token or list of tokens to stop the generation. Default to 0.
|
||||||
length_penalty: (`optional`) float
|
length_penalty: (`optional`) float
|
||||||
Exponential penalty to the length. Default to 1.
|
Exponential penalty to the length. Default to 1.
|
||||||
|
|
||||||
|
no_repeat_ngram_size: (`optional`) int
|
||||||
|
If set to int > 0, all ngrams of size `no_repeat_ngram_size` can only occur once.
|
||||||
|
|
||||||
num_return_sequences: (`optional`) int
|
num_return_sequences: (`optional`) int
|
||||||
The number of independently computed returned sequences for each element in the batch. Default to 1.
|
The number of independently computed returned sequences for each element in the batch. Default to 1.
|
||||||
|
|
||||||
|
attention_mask (`optional`) obj: `tf.Tensor` with `dtype=tf.int32` of same shape as `input_ids`
|
||||||
|
Mask to avoid performing attention on padding token indices.
|
||||||
|
Mask values selected in ``[0, 1]``:
|
||||||
|
``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
|
||||||
|
Defaults to `None`.
|
||||||
|
|
||||||
|
`What are attention masks? <../glossary.html#attention-mask>`__
|
||||||
|
|
||||||
|
decoder_start_token_id=None: (`optional`) int
|
||||||
|
If an encoder-decoder model starts decoding with a different token than BOS.
|
||||||
|
Defaults to `None` and is changed to `BOS` later.
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
|
|
||||||
output: `torch.LongTensor` of shape `(batch_size * num_return_sequences, sequence_length)`
|
output: `tf.Tensor` of `dtype=tf.int32` shape `(batch_size * num_return_sequences, sequence_length)`
|
||||||
sequence_length is either equal to max_length or shorter if all batches finished early due to the `eos_token_id`
|
sequence_length is either equal to max_length or shorter if all batches finished early due to the `eos_token_id`
|
||||||
|
|
||||||
Examples::
|
Examples::
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer
|
tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer
|
||||||
model = AutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache.
|
model = TFAutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache.
|
||||||
outputs = model.generate(max_length=40, bos_token_id=tokenizer.bos_token_id, eos_token_ids=tokenizer.eos_token_id, do_sample=False) # do greedy decoding
|
outputs = model.generate(max_length=40) # do greedy decoding
|
||||||
print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
|
print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained('openai-gpt') # Initialize tokenizer
|
tokenizer = AutoTokenizer.from_pretrained('openai-gpt') # Initialize tokenizer
|
||||||
model = AutoModelWithLMHead.from_pretrained('openai-gpt') # Download model and configuration from S3 and cache.
|
model = TFAutoModelWithLMHead.from_pretrained('openai-gpt') # Download model and configuration from S3 and cache.
|
||||||
input_context = 'The dog'
|
input_context = 'The dog'
|
||||||
input_ids = torch.tensor(tokenizer.encode(input_context)).unsqueeze(0) # encode input context
|
input_ids = tokenizer.encode(input_context, return_tensors='tf') # encode input context
|
||||||
outputs = model.generate(input_ids=input_ids, num_beams=5, num_return_sequences=3, temperature=1.5) # generate 3 independent sequences using beam search decoding (5 beams) with sampling from initial context 'The dog'
|
outputs = model.generate(input_ids=input_ids, num_beams=5, num_return_sequences=3, temperature=1.5) # generate 3 independent sequences using beam search decoding (5 beams) with sampling from initial context 'The dog'
|
||||||
for i in range(3): # 3 output sequences were generated
|
for i in range(3): # 3 output sequences were generated
|
||||||
print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
|
print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer
|
tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer
|
||||||
model = AutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache.
|
model = TFAutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache.
|
||||||
input_context = 'The dog'
|
input_context = 'The dog'
|
||||||
input_ids = torch.tensor(tokenizer.encode(input_context)).unsqueeze(0) # encode input context
|
input_ids = tokenizer.encode(input_context, return_tensors='tf') # encode input context
|
||||||
outputs = model.generate(input_ids=input_ids, max_length=40, temperature=0.7, bos_token_id=tokenizer.bos_token_id, pad_token_id=tokenizer.pad_token_id, eos_token_ids=tokenizer.eos_token_id, num_return_sequences=3) # 3 generate sequences using by sampling
|
outputs = model.generate(input_ids=input_ids, max_length=40, temperature=0.7, num_return_sequences=3) # 3 generate sequences using by sampling
|
||||||
for i in range(3): # 3 output sequences were generated
|
for i in range(3): # 3 output sequences were generated
|
||||||
print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
|
print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained('ctrl') # Initialize tokenizer
|
tokenizer = AutoTokenizer.from_pretrained('ctrl') # Initialize tokenizer
|
||||||
model = AutoModelWithLMHead.from_pretrained('ctrl') # Download model and configuration from S3 and cache.
|
model = TFAutoModelWithLMHead.from_pretrained('ctrl') # Download model and configuration from S3 and cache.
|
||||||
input_context = 'Legal My neighbor is' # "Legal" is one of the control codes for ctrl
|
input_context = 'Legal My neighbor is' # "Legal" is one of the control codes for ctrl
|
||||||
input_ids = torch.tensor(tokenizer.encode(input_context)).unsqueeze(0) # encode input context
|
input_ids = tokenizer.encode(input_context, return_tensors='tf') # encode input context
|
||||||
outputs = model.generate(input_ids=input_ids, max_length=50, temperature=0.7, repetition_penalty=1.2) # generate sequences
|
outputs = model.generate(input_ids=input_ids, max_length=50, temperature=0.7, repetition_penalty=1.2) # generate sequences
|
||||||
print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
|
print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
|
||||||
|
|
||||||
@ -563,7 +586,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
|
|||||||
if self.get_output_embeddings() is None:
|
if self.get_output_embeddings() is None:
|
||||||
raise AttributeError(
|
raise AttributeError(
|
||||||
"You tried to generate sequences with a model that does not have a LM Head."
|
"You tried to generate sequences with a model that does not have a LM Head."
|
||||||
"Please use another model class (e.g. `TFOpenAIGPTLMHeadModel`, `TFXLNetLMHeadModel`, `TFGPT2LMHeadModel`, `TFCTRLLMHeadModel`, `TFT5WithLMHeadModel`, `TFTransfoXLLMHeadModel`)"
|
"Please use another model class (e.g. `TFOpenAIGPTLMHeadModel`, `TFXLNetLMHeadModel`, `TFGPT2LMHeadModel`, `TFCTRLLMHeadModel`, `TFT5WithLMHeadModel`, `TFTransfoXLLMHeadModel`, `TFXLMWithLMHeadModel`)"
|
||||||
)
|
)
|
||||||
|
|
||||||
max_length = max_length if max_length is not None else self.config.max_length
|
max_length = max_length if max_length is not None else self.config.max_length
|
||||||
|
@ -672,8 +672,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
|
|||||||
attention_mask=None,
|
attention_mask=None,
|
||||||
decoder_start_token_id=None,
|
decoder_start_token_id=None,
|
||||||
):
|
):
|
||||||
r""" Generates sequences for models with a LM head. The method currently supports greedy or penalized greedy decoding, sampling with top-k or nucleus sampling
|
r""" Generates sequences for models with a LM head. The method currently supports greedy decoding, beam-search decoding, sampling with temperature, sampling with top-k or nucleus sampling.
|
||||||
and beam-search.
|
|
||||||
|
|
||||||
Adapted in part from `Facebook's XLM beam search code`_.
|
Adapted in part from `Facebook's XLM beam search code`_.
|
||||||
|
|
||||||
@ -688,10 +687,16 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
|
|||||||
it as an empty `torch.LongTensor` of shape `(1,)`.
|
it as an empty `torch.LongTensor` of shape `(1,)`.
|
||||||
|
|
||||||
max_length: (`optional`) int
|
max_length: (`optional`) int
|
||||||
The max length of the sequence to be generated. Between 1 and infinity. Default to 20.
|
The max length of the sequence to be generated. Between `min_length` and infinity. Default to 20.
|
||||||
|
|
||||||
|
min_length: (`optional`) int
|
||||||
|
The min length of the sequence to be generated. Between 0 and infinity. Default to 0.
|
||||||
|
|
||||||
do_sample: (`optional`) bool
|
do_sample: (`optional`) bool
|
||||||
If set to `False` greedy decoding is used. Otherwise sampling is used. Defaults to `False`.
|
If set to `False` greedy decoding is used. Otherwise sampling is used. Defaults to `False` as defined in `configuration_utils.PretrainedConfig`.
|
||||||
|
|
||||||
|
early_stopping: (`optional`) bool
|
||||||
|
if set to `True` beam search is stopped when at least `num_beams` sentences finished per batch. Defaults to `False` as defined in `configuration_utils.PretrainedConfig`.
|
||||||
|
|
||||||
num_beams: (`optional`) int
|
num_beams: (`optional`) int
|
||||||
Number of beams for beam search. Must be between 1 and infinity. 1 means no beam search. Default to 1.
|
Number of beams for beam search. Must be between 1 and infinity. 1 means no beam search. Default to 1.
|
||||||
@ -709,16 +714,35 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
|
|||||||
The parameter for repetition penalty. Between 1.0 and infinity. 1.0 means no penalty. Default to 1.0.
|
The parameter for repetition penalty. Between 1.0 and infinity. 1.0 means no penalty. Default to 1.0.
|
||||||
|
|
||||||
bos_token_id: (`optional`) int
|
bos_token_id: (`optional`) int
|
||||||
Beginning of sentence token if no prompt is provided. Default to 0.
|
BOS token. Defaults to bos_token_id as defined in the models config.
|
||||||
|
|
||||||
|
pad_token_id: (`optional`) int
|
||||||
|
Pad token. Defaults to pad_token_id as defined in the models config.
|
||||||
|
|
||||||
eos_token_ids: (`optional`) int or list of int
|
eos_token_ids: (`optional`) int or list of int
|
||||||
End of sequence token or list of tokens to stop the generation. Default to 0.
|
End of sequence token or list of tokens to stop the generation. Default to eos_token_ids as defined in the models config.
|
||||||
|
|
||||||
length_penalty: (`optional`) float
|
length_penalty: (`optional`) float
|
||||||
Exponential penalty to the length. Default to 1.
|
Exponential penalty to the length. Default to 1.
|
||||||
|
|
||||||
|
no_repeat_ngram_size: (`optional`) int
|
||||||
|
If set to int > 0, all ngrams of size `no_repeat_ngram_size` can only occur once.
|
||||||
|
|
||||||
num_return_sequences: (`optional`) int
|
num_return_sequences: (`optional`) int
|
||||||
The number of independently computed returned sequences for each element in the batch. Default to 1.
|
The number of independently computed returned sequences for each element in the batch. Default to 1.
|
||||||
|
|
||||||
|
attention_mask (`optional`) obj: `torch.LongTensor` of same shape as `input_ids`
|
||||||
|
Mask to avoid performing attention on padding token indices.
|
||||||
|
Mask values selected in ``[0, 1]``:
|
||||||
|
``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
|
||||||
|
Defaults to `None`.
|
||||||
|
|
||||||
|
`What are attention masks? <../glossary.html#attention-mask>`__
|
||||||
|
|
||||||
|
decoder_start_token_id=None: (`optional`) int
|
||||||
|
If an encoder-decoder model starts decoding with a different token than BOS.
|
||||||
|
Defaults to `None` and is changed to `BOS` later.
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
|
|
||||||
output: `torch.LongTensor` of shape `(batch_size * num_return_sequences, sequence_length)`
|
output: `torch.LongTensor` of shape `(batch_size * num_return_sequences, sequence_length)`
|
||||||
@ -728,13 +752,13 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
|
|||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer
|
tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer
|
||||||
model = AutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache.
|
model = AutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache.
|
||||||
outputs = model.generate(max_length=40, do_sample=False) # do greedy decoding
|
outputs = model.generate(max_length=40) # do greedy decoding
|
||||||
print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
|
print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained('openai-gpt') # Initialize tokenizer
|
tokenizer = AutoTokenizer.from_pretrained('openai-gpt') # Initialize tokenizer
|
||||||
model = AutoModelWithLMHead.from_pretrained('openai-gpt') # Download model and configuration from S3 and cache.
|
model = AutoModelWithLMHead.from_pretrained('openai-gpt') # Download model and configuration from S3 and cache.
|
||||||
input_context = 'The dog'
|
input_context = 'The dog'
|
||||||
input_ids = torch.tensor(tokenizer.encode(input_context)).unsqueeze(0) # encode input context
|
input_ids = tokenizer.encode(input_context, return_tensors='pt') # encode input context
|
||||||
outputs = model.generate(input_ids=input_ids, num_beams=5, num_return_sequences=3, temperature=1.5) # generate 3 independent sequences using beam search decoding (5 beams) with sampling from initial context 'The dog'
|
outputs = model.generate(input_ids=input_ids, num_beams=5, num_return_sequences=3, temperature=1.5) # generate 3 independent sequences using beam search decoding (5 beams) with sampling from initial context 'The dog'
|
||||||
for i in range(3): # 3 output sequences were generated
|
for i in range(3): # 3 output sequences were generated
|
||||||
print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
|
print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
|
||||||
@ -742,7 +766,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
|
|||||||
tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer
|
tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer
|
||||||
model = AutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache.
|
model = AutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache.
|
||||||
input_context = 'The dog'
|
input_context = 'The dog'
|
||||||
input_ids = torch.tensor(tokenizer.encode(input_context)).unsqueeze(0) # encode input context
|
input_ids = tokenizer.encode(input_context, return_tensors='pt') # encode input context
|
||||||
outputs = model.generate(input_ids=input_ids, max_length=40, temperature=0.7, num_return_sequences=3) # 3 generate sequences using by sampling
|
outputs = model.generate(input_ids=input_ids, max_length=40, temperature=0.7, num_return_sequences=3) # 3 generate sequences using by sampling
|
||||||
for i in range(3): # 3 output sequences were generated
|
for i in range(3): # 3 output sequences were generated
|
||||||
print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
|
print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
|
||||||
@ -750,7 +774,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
|
|||||||
tokenizer = AutoTokenizer.from_pretrained('ctrl') # Initialize tokenizer
|
tokenizer = AutoTokenizer.from_pretrained('ctrl') # Initialize tokenizer
|
||||||
model = AutoModelWithLMHead.from_pretrained('ctrl') # Download model and configuration from S3 and cache.
|
model = AutoModelWithLMHead.from_pretrained('ctrl') # Download model and configuration from S3 and cache.
|
||||||
input_context = 'Legal My neighbor is' # "Legal" is one of the control codes for ctrl
|
input_context = 'Legal My neighbor is' # "Legal" is one of the control codes for ctrl
|
||||||
input_ids = torch.tensor(tokenizer.encode(input_context)).unsqueeze(0) # encode input context
|
input_ids = tokenizer.encode(input_context, return_tensors='pt') # encode input context
|
||||||
outputs = model.generate(input_ids=input_ids, max_length=50, temperature=0.7, repetition_penalty=1.2) # generate sequences
|
outputs = model.generate(input_ids=input_ids, max_length=50, temperature=0.7, repetition_penalty=1.2) # generate sequences
|
||||||
print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
|
print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
|
||||||
|
|
||||||
@ -760,7 +784,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
|
|||||||
if self.get_output_embeddings() is None:
|
if self.get_output_embeddings() is None:
|
||||||
raise AttributeError(
|
raise AttributeError(
|
||||||
"You tried to generate sequences with a model that does not have a LM Head."
|
"You tried to generate sequences with a model that does not have a LM Head."
|
||||||
"Please use another model class (e.g. `OpenAIGPTLMHeadModel`, `XLNetLMHeadModel`, `GPT2LMHeadModel`, `CTRLLMHeadModel`, `T5WithLMHeadModel`, `TransfoXLLMHeadModel`)"
|
"Please use another model class (e.g. `OpenAIGPTLMHeadModel`, `XLNetLMHeadModel`, `GPT2LMHeadModel`, `CTRLLMHeadModel`, `T5WithLMHeadModel`, `TransfoXLLMHeadModel`, `XLMWithLMHeadModel`, `BartForConditionalGeneration` )"
|
||||||
)
|
)
|
||||||
|
|
||||||
max_length = max_length if max_length is not None else self.config.max_length
|
max_length = max_length if max_length is not None else self.config.max_length
|
||||||
|
Loading…
Reference in New Issue
Block a user