quick fix on concatenating text to support more datasets (#8474)

This commit is contained in:
zeyuyun1 2020-11-12 06:47:08 -08:00 committed by GitHub
parent 17b1fd804f
commit 924c624a46
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 3 additions and 3 deletions

View File

@ -254,7 +254,7 @@ def main():
tokenize_function,
batched=True,
num_proc=data_args.preprocessing_num_workers,
remove_columns=[text_column_name],
remove_columns=column_names,
load_from_cache_file=not data_args.overwrite_cache,
)

View File

@ -292,7 +292,7 @@ def main():
tokenize_function,
batched=True,
num_proc=data_args.preprocessing_num_workers,
remove_columns=[text_column_name],
remove_columns=column_names,
load_from_cache_file=not data_args.overwrite_cache,
)

View File

@ -279,7 +279,7 @@ def main():
tokenize_function,
batched=True,
num_proc=data_args.preprocessing_num_workers,
remove_columns=[text_column_name],
remove_columns=column_names,
load_from_cache_file=not data_args.overwrite_cache,
)