Use argument for preprocessing workers in run_summairzation (#15394)

2025-07-31 02:02:21 +06:00 · 2022-01-28 18:34:10 -05:00 · 2022-01-28 18:34:10 -05:00 · c98a6ac211
commit c98a6ac211
parent db07956740
1 changed files with 1 additions and 0 deletions
--- a/examples/pytorch/summarization/run_summarization_no_trainer.py
+++ b/examples/pytorch/summarization/run_summarization_no_trainer.py
@ -443,6 +443,7 @@ def main():
        processed_datasets = raw_datasets.map(
            preprocess_function,
            batched=True,
+            num_proc=args.preprocessing_num_workers,
            remove_columns=column_names,
            load_from_cache_file=not args.overwrite_cache,
            desc="Running tokenizer on dataset",