mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-28 16:52:24 +06:00
Add sentencepiece to the CI and fix tests (#8672)
* Fix the CI and tests * Fix quality * Remove that m form nowhere
This commit is contained in:
parent
0ad45e108d
commit
6494910f27
@ -77,7 +77,7 @@ jobs:
|
|||||||
- v0.4-torch_and_tf-{{ checksum "setup.py" }}
|
- v0.4-torch_and_tf-{{ checksum "setup.py" }}
|
||||||
- v0.4-{{ checksum "setup.py" }}
|
- v0.4-{{ checksum "setup.py" }}
|
||||||
- run: pip install --upgrade pip
|
- run: pip install --upgrade pip
|
||||||
- run: pip install .[sklearn,tf-cpu,torch,testing]
|
- run: pip install .[sklearn,tf-cpu,torch,testing,sentencepiece]
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: v0.4-{{ checksum "setup.py" }}
|
key: v0.4-{{ checksum "setup.py" }}
|
||||||
paths:
|
paths:
|
||||||
@ -103,7 +103,7 @@ jobs:
|
|||||||
- v0.4-torch-{{ checksum "setup.py" }}
|
- v0.4-torch-{{ checksum "setup.py" }}
|
||||||
- v0.4-{{ checksum "setup.py" }}
|
- v0.4-{{ checksum "setup.py" }}
|
||||||
- run: pip install --upgrade pip
|
- run: pip install --upgrade pip
|
||||||
- run: pip install .[sklearn,torch,testing]
|
- run: pip install .[sklearn,torch,testing,sentencepiece]
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: v0.4-torch-{{ checksum "setup.py" }}
|
key: v0.4-torch-{{ checksum "setup.py" }}
|
||||||
paths:
|
paths:
|
||||||
@ -129,7 +129,7 @@ jobs:
|
|||||||
- v0.4-tf-{{ checksum "setup.py" }}
|
- v0.4-tf-{{ checksum "setup.py" }}
|
||||||
- v0.4-{{ checksum "setup.py" }}
|
- v0.4-{{ checksum "setup.py" }}
|
||||||
- run: pip install --upgrade pip
|
- run: pip install --upgrade pip
|
||||||
- run: pip install .[sklearn,tf-cpu,testing]
|
- run: pip install .[sklearn,tf-cpu,testing,sentencepiece]
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: v0.4-tf-{{ checksum "setup.py" }}
|
key: v0.4-tf-{{ checksum "setup.py" }}
|
||||||
paths:
|
paths:
|
||||||
@ -155,7 +155,7 @@ jobs:
|
|||||||
- v0.4-flax-{{ checksum "setup.py" }}
|
- v0.4-flax-{{ checksum "setup.py" }}
|
||||||
- v0.4-{{ checksum "setup.py" }}
|
- v0.4-{{ checksum "setup.py" }}
|
||||||
- run: pip install --upgrade pip
|
- run: pip install --upgrade pip
|
||||||
- run: sudo pip install .[flax,sklearn,torch,testing]
|
- run: sudo pip install .[flax,sklearn,torch,testing,sentencepiece]
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: v0.4-flax-{{ checksum "setup.py" }}
|
key: v0.4-flax-{{ checksum "setup.py" }}
|
||||||
paths:
|
paths:
|
||||||
@ -181,7 +181,7 @@ jobs:
|
|||||||
- v0.4-torch-{{ checksum "setup.py" }}
|
- v0.4-torch-{{ checksum "setup.py" }}
|
||||||
- v0.4-{{ checksum "setup.py" }}
|
- v0.4-{{ checksum "setup.py" }}
|
||||||
- run: pip install --upgrade pip
|
- run: pip install --upgrade pip
|
||||||
- run: pip install .[sklearn,torch,testing]
|
- run: pip install .[sklearn,torch,testing,sentencepiece]
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: v0.4-torch-{{ checksum "setup.py" }}
|
key: v0.4-torch-{{ checksum "setup.py" }}
|
||||||
paths:
|
paths:
|
||||||
@ -207,7 +207,7 @@ jobs:
|
|||||||
- v0.4-tf-{{ checksum "setup.py" }}
|
- v0.4-tf-{{ checksum "setup.py" }}
|
||||||
- v0.4-{{ checksum "setup.py" }}
|
- v0.4-{{ checksum "setup.py" }}
|
||||||
- run: pip install --upgrade pip
|
- run: pip install --upgrade pip
|
||||||
- run: pip install .[sklearn,tf-cpu,testing]
|
- run: pip install .[sklearn,tf-cpu,testing,sentencepiece]
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: v0.4-tf-{{ checksum "setup.py" }}
|
key: v0.4-tf-{{ checksum "setup.py" }}
|
||||||
paths:
|
paths:
|
||||||
@ -231,7 +231,7 @@ jobs:
|
|||||||
- v0.4-custom_tokenizers-{{ checksum "setup.py" }}
|
- v0.4-custom_tokenizers-{{ checksum "setup.py" }}
|
||||||
- v0.4-{{ checksum "setup.py" }}
|
- v0.4-{{ checksum "setup.py" }}
|
||||||
- run: pip install --upgrade pip
|
- run: pip install --upgrade pip
|
||||||
- run: pip install .[ja,testing]
|
- run: pip install .[ja,testing,sentencepiece]
|
||||||
- run: python -m unidic download
|
- run: python -m unidic download
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: v0.4-custom_tokenizers-{{ checksum "setup.py" }}
|
key: v0.4-custom_tokenizers-{{ checksum "setup.py" }}
|
||||||
@ -258,7 +258,7 @@ jobs:
|
|||||||
- v0.4-torch_examples-{{ checksum "setup.py" }}
|
- v0.4-torch_examples-{{ checksum "setup.py" }}
|
||||||
- v0.4-{{ checksum "setup.py" }}
|
- v0.4-{{ checksum "setup.py" }}
|
||||||
- run: pip install --upgrade pip
|
- run: pip install --upgrade pip
|
||||||
- run: pip install .[sklearn,torch,testing]
|
- run: pip install .[sklearn,torch,sentencepiece,testing]
|
||||||
- run: pip install -r examples/requirements.txt
|
- run: pip install -r examples/requirements.txt
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: v0.4-torch_examples-{{ checksum "setup.py" }}
|
key: v0.4-torch_examples-{{ checksum "setup.py" }}
|
||||||
@ -324,7 +324,7 @@ jobs:
|
|||||||
- v0.4-{{ checksum "setup.py" }}
|
- v0.4-{{ checksum "setup.py" }}
|
||||||
- run: pip install --upgrade pip
|
- run: pip install --upgrade pip
|
||||||
- run: pip install isort
|
- run: pip install isort
|
||||||
- run: pip install .[tf,torch,flax,quality]
|
- run: pip install .[all,quality]
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: v0.4-code_quality-{{ checksum "setup.py" }}
|
key: v0.4-code_quality-{{ checksum "setup.py" }}
|
||||||
paths:
|
paths:
|
||||||
|
@ -188,7 +188,7 @@ class MBartTokenizer(XLMRobertaTokenizer):
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
) -> BatchEncoding:
|
) -> BatchEncoding:
|
||||||
if max_length is None:
|
if max_length is None:
|
||||||
max_length = self.max_len
|
max_length = self.model_max_length
|
||||||
self.set_src_lang_special_tokens(src_lang)
|
self.set_src_lang_special_tokens(src_lang)
|
||||||
model_inputs: BatchEncoding = self(
|
model_inputs: BatchEncoding = self(
|
||||||
src_texts,
|
src_texts,
|
||||||
|
@ -185,7 +185,7 @@ class MBartTokenizerFast(XLMRobertaTokenizerFast):
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
) -> BatchEncoding:
|
) -> BatchEncoding:
|
||||||
if max_length is None:
|
if max_length is None:
|
||||||
max_length = self.max_len
|
max_length = self.model_max_length
|
||||||
self.set_src_lang_special_tokens(src_lang)
|
self.set_src_lang_special_tokens(src_lang)
|
||||||
model_inputs: BatchEncoding = self(
|
model_inputs: BatchEncoding = self(
|
||||||
src_texts,
|
src_texts,
|
||||||
|
@ -309,7 +309,7 @@ class T5Tokenizer(PreTrainedTokenizer):
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
) -> BatchEncoding:
|
) -> BatchEncoding:
|
||||||
if max_length is None:
|
if max_length is None:
|
||||||
max_length = self.max_len
|
max_length = self.model_max_length
|
||||||
model_inputs = self(
|
model_inputs = self(
|
||||||
src_texts,
|
src_texts,
|
||||||
add_special_tokens=True,
|
add_special_tokens=True,
|
||||||
|
@ -226,7 +226,7 @@ class T5TokenizerFast(PreTrainedTokenizerFast):
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
) -> BatchEncoding:
|
) -> BatchEncoding:
|
||||||
if max_length is None:
|
if max_length is None:
|
||||||
max_length = self.max_len
|
max_length = self.model_max_length
|
||||||
self.prefix_tokens = []
|
self.prefix_tokens = []
|
||||||
model_inputs = self(
|
model_inputs = self(
|
||||||
src_texts,
|
src_texts,
|
||||||
|
@ -1,14 +1,7 @@
|
|||||||
import tempfile
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import (
|
from transformers import SPIECE_UNDERLINE, BatchEncoding, MBartTokenizer, MBartTokenizerFast, is_torch_available
|
||||||
SPIECE_UNDERLINE,
|
|
||||||
AutoTokenizer,
|
|
||||||
BatchEncoding,
|
|
||||||
MBartTokenizer,
|
|
||||||
MBartTokenizerFast,
|
|
||||||
is_torch_available,
|
|
||||||
)
|
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
_sentencepiece_available,
|
_sentencepiece_available,
|
||||||
require_sentencepiece,
|
require_sentencepiece,
|
||||||
@ -138,7 +131,7 @@ class MBartEnroIntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.tokenizer: MBartTokenizer = AutoTokenizer.from_pretrained(cls.checkpoint_name)
|
cls.tokenizer: MBartTokenizer = MBartTokenizer.from_pretrained(cls.checkpoint_name)
|
||||||
cls.pad_token_id = 1
|
cls.pad_token_id = 1
|
||||||
return cls
|
return cls
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user