Make Barthez tokenizer tests a bit faster (#10399)

* Make Barthez tokenizer tests a bit faster

* Quality
This commit is contained in:
Sylvain Gugger 2021-02-25 11:42:25 -05:00 committed by GitHub
parent b040e6efc1
commit 26f8b2cb10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 6 additions and 8 deletions

View File

@ -33,8 +33,9 @@ class BarthezTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def setUp(self):
super().setUp()
tokenizer = BarthezTokenizer.from_pretrained("moussaKam/mbarthez")
tokenizer = BarthezTokenizerFast.from_pretrained("moussaKam/mbarthez")
tokenizer.save_pretrained(self.tmpdirname)
tokenizer.save_pretrained(self.tmpdirname, legacy_format=False)
self.tokenizer = tokenizer
@require_torch

View File

@ -238,7 +238,7 @@ class TokenizerTesterMixin:
tokenizer = self.get_rust_tokenizer()
for parameter_name, parameter in signature.parameters.items():
if parameter.default != inspect.Parameter.empty:
if parameter.default != inspect.Parameter.empty and parameter_name != "tokenizer_file":
self.assertIn(parameter_name, tokenizer.init_kwargs)
def test_rust_and_python_full_tokenizers(self):

View File

@ -12,18 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import tempfile
import unittest
from transformers import SPIECE_UNDERLINE, BatchEncoding, MBartTokenizer, MBartTokenizerFast, is_torch_available
from transformers.file_utils import is_sentencepiece_available
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
from .test_tokenization_common import TokenizerTesterMixin
if is_sentencepiece_available():
from .test_tokenization_xlm_roberta import SAMPLE_VOCAB
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
if is_torch_available():

View File

@ -17,14 +17,12 @@ import tempfile
import unittest
from transformers import SPIECE_UNDERLINE, BatchEncoding, MBart50Tokenizer, MBart50TokenizerFast, is_torch_available
from transformers.file_utils import is_sentencepiece_available
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
from .test_tokenization_common import TokenizerTesterMixin
if is_sentencepiece_available():
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
if is_torch_available():