diff --git a/docs/source/en/preprocessing.mdx b/docs/source/en/preprocessing.mdx index 947129b34aa..6e2e9bd7521 100644 --- a/docs/source/en/preprocessing.mdx +++ b/docs/source/en/preprocessing.mdx @@ -158,14 +158,17 @@ Set the `return_tensors` parameter to either `pt` for PyTorch, or `tf` for Tenso ... "Don't think he knows about second breakfast, Pip.", ... "What about elevensies?", ... ] ->>> encoded_input = tokenizer(batch, padding=True, truncation=True, return_tensors="pt") +>>> encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="pt") >>> print(encoded_input) -{'input_ids': tensor([[ 101, 153, 7719, 21490, 1122, 1114, 9582, 1623, 102], - [ 101, 5226, 1122, 9649, 1199, 2610, 1236, 102, 0]]), - 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0]]), - 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 0]])} +{'input_ids': tensor([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0], + [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102], + [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]]), + 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), + 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])} ``` @@ -178,15 +181,18 @@ Set the `return_tensors` parameter to either `pt` for PyTorch, or `tf` for Tenso >>> encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf") >>> print(encoded_input) {'input_ids': , 'token_type_ids': , +array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>, 'attention_mask': } +array([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>} ```