mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Add check to verify existence of pad_token_id
In batch_encode_plus we have to ensure that the tokenizer has a pad_token_id so that, when padding, no None values are added as padding. That would happen with gpt2, openai, transfoxl. closes https://github.com/huggingface/transformers/issues/2640
This commit is contained in:
parent
e63a81dd25
commit
9fde13a3ac
@ -998,7 +998,8 @@ class PreTrainedTokenizer(object):
|
||||
for key, value in batch_outputs.items():
|
||||
|
||||
padded_value = value
|
||||
if key != "input_len":
|
||||
# verify that the tokenizer has a pad_token_id
|
||||
if key != "input_len" and self.pad_token_id is not None:
|
||||
# Padding handle
|
||||
padded_value = [
|
||||
v + [self.pad_token_id if key == "input_ids" else 1] * (max_seq_len - len(v))
|
||||
|
Loading…
Reference in New Issue
Block a user