From 1dc43e56c9116dc1a200ea13512bd7788c448837 Mon Sep 17 00:00:00 2001 From: LysandreJik Date: Wed, 28 Aug 2019 09:37:27 -0400 Subject: [PATCH] Documentation additions --- docs/source/index.rst | 1 + docs/source/model_doc/distilbert.rst | 43 +++++++++++++++++++++ docs/source/pretrained_models.rst | 8 ++++ pytorch_transformers/modeling_distilbert.py | 8 ++-- 4 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 docs/source/model_doc/distilbert.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 37b3509fe40..1ae722e1e87 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -48,3 +48,4 @@ The library currently contains PyTorch implementations, pre-trained model weight model_doc/xlm model_doc/xlnet model_doc/roberta + model_doc/distilbert diff --git a/docs/source/model_doc/distilbert.rst b/docs/source/model_doc/distilbert.rst new file mode 100644 index 00000000000..cc156c90c21 --- /dev/null +++ b/docs/source/model_doc/distilbert.rst @@ -0,0 +1,43 @@ +DistilBERT +---------------------------------------------------- + +``DistilBertConfig`` +~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: pytorch_transformers.DistilBertConfig + :members: + + +``DistilBertTokenizer`` +~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: pytorch_transformers.DistilBertTokenizer + :members: + + +``DistilBertModel`` +~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: pytorch_transformers.DistilBertModel + :members: + + +``DistilBertForMaskedLM`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: pytorch_transformers.DistilBertForMaskedLM + :members: + + +``DistilBertForSequenceClassification`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: pytorch_transformers.DistilBertForSequenceClassification + :members: + + +``DistilBertForQuestionAnswering`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: pytorch_transformers.DistilBertForQuestionAnswering + :members: diff --git a/docs/source/pretrained_models.rst b/docs/source/pretrained_models.rst index 7df70ea2257..2bbb7ae7a19 100644 --- a/docs/source/pretrained_models.rst +++ b/docs/source/pretrained_models.rst @@ -111,5 +111,13 @@ Here is the full list of the currently provided pretrained models together with | | | | ``roberta-large`` fine-tuned on `MNLI `__. | | | | (see `details `__) | +-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ +| DistilBERT | ``distilbert-base-uncased`` | | 6-layer, 768-hidden, 12-heads, 66M parameters | +| | | | The DistilBERT model distilled from the BERT model `bert-base-uncased` checkpoint | +| | | (see `details `__) | +| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ +| | ``distilbert-base-uncased-distilled-squad`` | | 6-layer, 768-hidden, 12-heads, 66M parameters | +| | | | The DistilBERT model distilled from the BERT model `bert-base-uncased` checkpoint, with an additional linear layer. | +| | | (see `details `__) | ++-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ .. `__ \ No newline at end of file diff --git a/pytorch_transformers/modeling_distilbert.py b/pytorch_transformers/modeling_distilbert.py index af77757293c..6ae18bdb015 100644 --- a/pytorch_transformers/modeling_distilbert.py +++ b/pytorch_transformers/modeling_distilbert.py @@ -433,7 +433,7 @@ DISTILBERT_START_DOCSTRING = r""" Here are the differences between the interface of Bert and DistilBert: - - DistilBert doesn't have `token_type_ids`, you don't need to indicate which token belong to which segment. Just separate your segments with the separation token `tokenizer.sep_token` (or `[SEP]`) + - DistilBert doesn't have `token_type_ids`, you don't need to indicate which token belongs to which segment. Just separate your segments with the separation token `tokenizer.sep_token` (or `[SEP]`) - DistilBert doesn't have options to select the input positions (`position_ids` input). This could be added if necessary though, just let's us know if you need this option. For more information on DistilBERT, please refer to our @@ -450,9 +450,9 @@ DISTILBERT_START_DOCSTRING = r""" DISTILBERT_INPUTS_DOCSTRING = r""" Inputs: - **input_ids**L ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: - Indices oof input sequence tokens in the vocabulary. - The input sequences should start with `[CLS]` and `[SEP]` tokens. + **input_ids** ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: + Indices of input sequence tokens in the vocabulary. + The input sequences should start with `[CLS]` and end with `[SEP]` tokens. For now, ONLY BertTokenizer(`bert-base-uncased`) is supported and you should use this tokenizer when using DistilBERT. **attention_mask**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: