mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
Make Barthez tokenizer tests a bit faster (#10399)
* Make Barthez tokenizer tests a bit faster * Quality
This commit is contained in:
parent
b040e6efc1
commit
26f8b2cb10
@ -33,8 +33,9 @@ class BarthezTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
||||
tokenizer = BarthezTokenizer.from_pretrained("moussaKam/mbarthez")
|
||||
tokenizer = BarthezTokenizerFast.from_pretrained("moussaKam/mbarthez")
|
||||
tokenizer.save_pretrained(self.tmpdirname)
|
||||
tokenizer.save_pretrained(self.tmpdirname, legacy_format=False)
|
||||
self.tokenizer = tokenizer
|
||||
|
||||
@require_torch
|
||||
|
@ -238,7 +238,7 @@ class TokenizerTesterMixin:
|
||||
tokenizer = self.get_rust_tokenizer()
|
||||
|
||||
for parameter_name, parameter in signature.parameters.items():
|
||||
if parameter.default != inspect.Parameter.empty:
|
||||
if parameter.default != inspect.Parameter.empty and parameter_name != "tokenizer_file":
|
||||
self.assertIn(parameter_name, tokenizer.init_kwargs)
|
||||
|
||||
def test_rust_and_python_full_tokenizers(self):
|
||||
|
@ -12,18 +12,17 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import SPIECE_UNDERLINE, BatchEncoding, MBartTokenizer, MBartTokenizerFast, is_torch_available
|
||||
from transformers.file_utils import is_sentencepiece_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
|
||||
|
||||
from .test_tokenization_common import TokenizerTesterMixin
|
||||
|
||||
|
||||
if is_sentencepiece_available():
|
||||
from .test_tokenization_xlm_roberta import SAMPLE_VOCAB
|
||||
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
|
@ -17,14 +17,12 @@ import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import SPIECE_UNDERLINE, BatchEncoding, MBart50Tokenizer, MBart50TokenizerFast, is_torch_available
|
||||
from transformers.file_utils import is_sentencepiece_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
|
||||
|
||||
from .test_tokenization_common import TokenizerTesterMixin
|
||||
|
||||
|
||||
if is_sentencepiece_available():
|
||||
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
|
||||
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
|
Loading…
Reference in New Issue
Block a user