mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 05:10:06 +06:00
docs: add docs for map, and add num procs to load_dataset (#27520)
This commit is contained in:
parent
85fde09c97
commit
69c9b89fcb
@ -439,6 +439,7 @@ def main():
|
|||||||
data_args.dataset_config_name,
|
data_args.dataset_config_name,
|
||||||
split=data_args.train_split_name,
|
split=data_args.train_split_name,
|
||||||
cache_dir=data_args.dataset_cache_dir,
|
cache_dir=data_args.dataset_cache_dir,
|
||||||
|
num_proc=data_args.preprocessing_num_workers,
|
||||||
token=True if model_args.use_auth_token else None,
|
token=True if model_args.use_auth_token else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -448,6 +449,7 @@ def main():
|
|||||||
data_args.dataset_config_name,
|
data_args.dataset_config_name,
|
||||||
split=data_args.eval_split_name,
|
split=data_args.eval_split_name,
|
||||||
cache_dir=data_args.dataset_cache_dir,
|
cache_dir=data_args.dataset_cache_dir,
|
||||||
|
num_proc=data_args.preprocessing_num_workers,
|
||||||
token=True if model_args.use_auth_token else None,
|
token=True if model_args.use_auth_token else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -551,7 +553,7 @@ def main():
|
|||||||
prepare_dataset,
|
prepare_dataset,
|
||||||
remove_columns=next(iter(raw_datasets.values())).column_names,
|
remove_columns=next(iter(raw_datasets.values())).column_names,
|
||||||
num_proc=num_workers,
|
num_proc=num_workers,
|
||||||
desc="preprocess train dataset",
|
desc="preprocess train and eval dataset",
|
||||||
)
|
)
|
||||||
|
|
||||||
# filter training data with inputs longer than max_input_length
|
# filter training data with inputs longer than max_input_length
|
||||||
|
Loading…
Reference in New Issue
Block a user