From 8fb60bf6be70163a5de9d17d167506276fddbebc Mon Sep 17 00:00:00 2001 From: Fanli Lin Date: Tue, 13 May 2025 18:50:12 +0800 Subject: [PATCH] add timeout for downloading the `librispeech_asr` dataset (#38073) * add timeout * change 10 to 60 --- .../speech-pretraining/run_wav2vec2_pretraining_no_trainer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py index b4ce3f71eb5..3bb7fe7bdc0 100755 --- a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py +++ b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py @@ -21,6 +21,7 @@ from dataclasses import dataclass from pathlib import Path from typing import Optional, Union +import aiohttp import datasets import torch from accelerate import Accelerator @@ -454,6 +455,7 @@ def main(): split=train_split_name, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code, + storage_options={"client_kwargs": {"timeout": aiohttp.ClientTimeout(total=60 * 60)}}, ) datasets_splits.append(dataset_split)