mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
remove duplicate words in msg (#31876)
This commit is contained in:
parent
97aa3e2905
commit
e9eeedaf3b
@ -290,7 +290,7 @@ class FlaxDataCollatorForBartDenoisingLM:
|
||||
def __post_init__(self):
|
||||
if self.tokenizer.mask_token is None or self.tokenizer.eos_token is None:
|
||||
raise ValueError(
|
||||
"This tokenizer does not have a mask token or eos token token which is necessary for denoising"
|
||||
"This tokenizer does not have a mask token or eos token which is necessary for denoising"
|
||||
" language modeling. "
|
||||
)
|
||||
|
||||
|
@ -132,7 +132,7 @@ class PreprocessingArguments:
|
||||
default="transformersbook/codeparrot", metadata={"help": "Folder or name of dataset to process."}
|
||||
)
|
||||
output_dir: Optional[str] = field(
|
||||
default="codeparrot-clean", metadata={"help": "Folder to save processed processed dataset."}
|
||||
default="codeparrot-clean", metadata={"help": "Folder to save processed dataset."}
|
||||
)
|
||||
samples_per_file: Optional[int] = field(
|
||||
default=100_000, metadata={"help": "Number of files to save per JSON output file."}
|
||||
|
@ -176,7 +176,7 @@ def _download(url: str, root: str) -> Any:
|
||||
model_bytes = open(download_target, "rb").read()
|
||||
if insecure_hashlib.sha256(model_bytes).hexdigest() != expected_sha256:
|
||||
raise RuntimeError(
|
||||
"Model has been downloaded but the SHA256 checksum does not not match. Please retry loading the model."
|
||||
"Model has been downloaded but the SHA256 checksum does not match. Please retry loading the model."
|
||||
)
|
||||
|
||||
return torch.load(io.BytesIO(model_bytes))
|
||||
|
Loading…
Reference in New Issue
Block a user