mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-01 02:31:11 +06:00
remove duplicate words in msg (#31876)
This commit is contained in:
parent
97aa3e2905
commit
e9eeedaf3b
@ -290,7 +290,7 @@ class FlaxDataCollatorForBartDenoisingLM:
|
|||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
if self.tokenizer.mask_token is None or self.tokenizer.eos_token is None:
|
if self.tokenizer.mask_token is None or self.tokenizer.eos_token is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"This tokenizer does not have a mask token or eos token token which is necessary for denoising"
|
"This tokenizer does not have a mask token or eos token which is necessary for denoising"
|
||||||
" language modeling. "
|
" language modeling. "
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -132,7 +132,7 @@ class PreprocessingArguments:
|
|||||||
default="transformersbook/codeparrot", metadata={"help": "Folder or name of dataset to process."}
|
default="transformersbook/codeparrot", metadata={"help": "Folder or name of dataset to process."}
|
||||||
)
|
)
|
||||||
output_dir: Optional[str] = field(
|
output_dir: Optional[str] = field(
|
||||||
default="codeparrot-clean", metadata={"help": "Folder to save processed processed dataset."}
|
default="codeparrot-clean", metadata={"help": "Folder to save processed dataset."}
|
||||||
)
|
)
|
||||||
samples_per_file: Optional[int] = field(
|
samples_per_file: Optional[int] = field(
|
||||||
default=100_000, metadata={"help": "Number of files to save per JSON output file."}
|
default=100_000, metadata={"help": "Number of files to save per JSON output file."}
|
||||||
|
@ -176,7 +176,7 @@ def _download(url: str, root: str) -> Any:
|
|||||||
model_bytes = open(download_target, "rb").read()
|
model_bytes = open(download_target, "rb").read()
|
||||||
if insecure_hashlib.sha256(model_bytes).hexdigest() != expected_sha256:
|
if insecure_hashlib.sha256(model_bytes).hexdigest() != expected_sha256:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"Model has been downloaded but the SHA256 checksum does not not match. Please retry loading the model."
|
"Model has been downloaded but the SHA256 checksum does not match. Please retry loading the model."
|
||||||
)
|
)
|
||||||
|
|
||||||
return torch.load(io.BytesIO(model_bytes))
|
return torch.load(io.BytesIO(model_bytes))
|
||||||
|
Loading…
Reference in New Issue
Block a user