minor doc fixes (#5831)

* minor doc fixes correct superclass name and small grammar fixes * correct the instance name in the error message It appears to be `BaseTokenizer` from looking at: `from tokenizers.implementations import BaseTokenizer as BaseTokenizerFast` and not `Tokenizer` as it currently says.
2025-07-31 02:02:21 +06:00 · 2020-07-22 10:22:34 -07:00 · 2020-07-22 10:22:34 -07:00 · 2c0da7803a
commit 2c0da7803a
parent feeb956a19
1 changed files with 6 additions and 6 deletions
--- a/src/transformers/tokenization_utils_fast.py
+++ b/src/transformers/tokenization_utils_fast.py
@ -44,12 +44,12 @@ logger = logging.getLogger(__name__)
 class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
    """ Base class for all fast tokenizers (wrapping HuggingFace tokenizers library).

-    Inherit from PreTrainedTokenizer.
+    Inherits from PreTrainedTokenizerBase.

-    Handle all the shared methods for tokenization and special tokens as well as methods
-    downloading/caching/loading pretrained tokenizers as well as adding tokens to the vocabulary.
+    Handles all the shared methods for tokenization and special tokens, as well as methods for
+    downloading/caching/loading pretrained tokenizers, as well as adding tokens to the vocabulary.

-    This class also contain the added tokens in a unified way on top of all tokenizers so we don't
+    This class also contains the added tokens in a unified way on top of all tokenizers so we don't
    have to handle the specific vocabulary augmentation methods of the various underlying
    dictionary structures (BPE, sentencepiece...).

@ -95,7 +95,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
        - ``mask_token``: (`Optional`) string: a masking token (e.g. when training a model with masked-language
            modeling). Will be associated to ``self.mask_token`` and ``self.mask_token_id``
        - ``additional_special_tokens``: (`Optional`) list: a list of additional special tokens.
-            Adding all special tokens here ensure they won't be split by the tokenization process.
+            Adding all special tokens here to ensure they won't be split by the tokenization process.
            Will be associated to ``self.additional_special_tokens`` and ``self.additional_special_tokens_ids``


@ -105,7 +105,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
    def __init__(self, tokenizer: BaseTokenizerFast, **kwargs):
        if not isinstance(tokenizer, BaseTokenizerFast):
            raise ValueError(
-                "Tokenizer should be an instance of a Tokenizer " "provided by HuggingFace tokenizers library."
+                "Tokenizer should be an instance of a BaseTokenizer " "provided by HuggingFace tokenizers library."
            )
        self._tokenizer: BaseTokenizerFast = tokenizer