remove duplicate words in msg (#31876)

This commit is contained in:
yukionfire 2024-07-10 16:54:45 +08:00 committed by GitHub
parent 97aa3e2905
commit e9eeedaf3b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 3 additions and 3 deletions

View File

@ -290,7 +290,7 @@ class FlaxDataCollatorForBartDenoisingLM:
def __post_init__(self):
if self.tokenizer.mask_token is None or self.tokenizer.eos_token is None:
raise ValueError(
"This tokenizer does not have a mask token or eos token token which is necessary for denoising"
"This tokenizer does not have a mask token or eos token which is necessary for denoising"
" language modeling. "
)

View File

@ -132,7 +132,7 @@ class PreprocessingArguments:
default="transformersbook/codeparrot", metadata={"help": "Folder or name of dataset to process."}
)
output_dir: Optional[str] = field(
default="codeparrot-clean", metadata={"help": "Folder to save processed processed dataset."}
default="codeparrot-clean", metadata={"help": "Folder to save processed dataset."}
)
samples_per_file: Optional[int] = field(
default=100_000, metadata={"help": "Number of files to save per JSON output file."}

View File

@ -176,7 +176,7 @@ def _download(url: str, root: str) -> Any:
model_bytes = open(download_target, "rb").read()
if insecure_hashlib.sha256(model_bytes).hexdigest() != expected_sha256:
raise RuntimeError(
"Model has been downloaded but the SHA256 checksum does not not match. Please retry loading the model."
"Model has been downloaded but the SHA256 checksum does not match. Please retry loading the model."
)
return torch.load(io.BytesIO(model_bytes))