Use argument for preprocessing workers in run_summairzation (#15394)

This commit is contained in:
Sylvain Gugger 2022-01-28 18:34:10 -05:00 committed by GitHub
parent db07956740
commit c98a6ac211
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -443,6 +443,7 @@ def main():
processed_datasets = raw_datasets.map(
preprocess_function,
batched=True,
num_proc=args.preprocessing_num_workers,
remove_columns=column_names,
load_from_cache_file=not args.overwrite_cache,
desc="Running tokenizer on dataset",