mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 05:10:06 +06:00
Change GPUS to GPUs (#36945)
Signed-off-by: zhanluxianshen <zhanluxianshen@163.com> Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
parent
69632aadb7
commit
ebd2029483
@ -263,9 +263,9 @@ You are not required to read the following guidelines before opening an issue. H
|
|||||||
But if you're replying to a comment that happened some comments back it's always a good practice to quote just the relevant lines you're replying it. The `>` is used for quoting, or you can always use the menu to do so. For example your editor box will look like:
|
But if you're replying to a comment that happened some comments back it's always a good practice to quote just the relevant lines you're replying it. The `>` is used for quoting, or you can always use the menu to do so. For example your editor box will look like:
|
||||||
|
|
||||||
```
|
```
|
||||||
> How big is your gpu cluster?
|
> How big is your GPU cluster?
|
||||||
|
|
||||||
Our cluster is made of 256 gpus.
|
Our cluster is made of 256 GPUs.
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are addressing multiple comments, quote the relevant parts of each before your answer. Some people use the same comment to do multiple replies, others separate them into separate comments. Either way works. The latter approach helps for linking to a specific comment.
|
If you are addressing multiple comments, quote the relevant parts of each before your answer. Some people use the same comment to do multiple replies, others separate them into separate comments. Either way works. The latter approach helps for linking to a specific comment.
|
||||||
|
@ -209,7 +209,7 @@ th 56 \
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Multi-GPU Evaluation
|
### Multi-GPU Evaluation
|
||||||
here is a command to run xsum evaluation on 8 GPUS. It is more than linearly faster than run_eval.py in some cases
|
here is a command to run xsum evaluation on 8 GPUs. It is more than linearly faster than run_eval.py in some cases
|
||||||
because it uses SortishSampler to minimize padding. You can also use it on 1 GPU. `data_dir` must have
|
because it uses SortishSampler to minimize padding. You can also use it on 1 GPU. `data_dir` must have
|
||||||
`{type_path}.source` and `{type_path}.target`. Run `./run_distributed_eval.py --help` for all clargs.
|
`{type_path}.source` and `{type_path}.target`. Run `./run_distributed_eval.py --help` for all clargs.
|
||||||
|
|
||||||
|
@ -158,7 +158,7 @@ class EetqTest(unittest.TestCase):
|
|||||||
def test_quantized_model_multi_gpu(self):
|
def test_quantized_model_multi_gpu(self):
|
||||||
"""
|
"""
|
||||||
Simple test that checks if the quantized model is working properly with multiple GPUs
|
Simple test that checks if the quantized model is working properly with multiple GPUs
|
||||||
set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUS
|
set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUs
|
||||||
"""
|
"""
|
||||||
input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
||||||
quantization_config = EetqConfig()
|
quantization_config = EetqConfig()
|
||||||
|
@ -215,7 +215,7 @@ class FbgemmFp8Test(unittest.TestCase):
|
|||||||
def test_quantized_model_multi_gpu(self):
|
def test_quantized_model_multi_gpu(self):
|
||||||
"""
|
"""
|
||||||
Simple test that checks if the quantized model is working properly with multiple GPUs
|
Simple test that checks if the quantized model is working properly with multiple GPUs
|
||||||
set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUS
|
set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUs
|
||||||
"""
|
"""
|
||||||
input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
||||||
quantization_config = FbgemmFp8Config()
|
quantization_config = FbgemmFp8Config()
|
||||||
|
@ -193,7 +193,7 @@ class FP8QuantizerTest(unittest.TestCase):
|
|||||||
def test_quantized_model_multi_gpu(self):
|
def test_quantized_model_multi_gpu(self):
|
||||||
"""
|
"""
|
||||||
Simple test that checks if the quantized model is working properly with multiple GPUs
|
Simple test that checks if the quantized model is working properly with multiple GPUs
|
||||||
set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUS
|
set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUs
|
||||||
"""
|
"""
|
||||||
input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(self.device_map)
|
input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(self.device_map)
|
||||||
quantization_config = FineGrainedFP8Config()
|
quantization_config = FineGrainedFP8Config()
|
||||||
|
@ -156,7 +156,7 @@ class HiggsTest(unittest.TestCase):
|
|||||||
def test_quantized_model_multi_gpu(self):
|
def test_quantized_model_multi_gpu(self):
|
||||||
"""
|
"""
|
||||||
Simple test that checks if the quantized model is working properly with multiple GPUs
|
Simple test that checks if the quantized model is working properly with multiple GPUs
|
||||||
set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUS
|
set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUs
|
||||||
"""
|
"""
|
||||||
input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
||||||
quantization_config = HiggsConfig()
|
quantization_config = HiggsConfig()
|
||||||
|
@ -255,7 +255,7 @@ class TorchAoGPUTest(TorchAoTest):
|
|||||||
def test_int4wo_quant_multi_gpu(self):
|
def test_int4wo_quant_multi_gpu(self):
|
||||||
"""
|
"""
|
||||||
Simple test that checks if the quantized model int4 weight only is working properly with multiple GPUs
|
Simple test that checks if the quantized model int4 weight only is working properly with multiple GPUs
|
||||||
set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUS
|
set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUs
|
||||||
"""
|
"""
|
||||||
|
|
||||||
quant_config = TorchAoConfig("int4_weight_only", **self.quant_scheme_kwargs)
|
quant_config = TorchAoConfig("int4_weight_only", **self.quant_scheme_kwargs)
|
||||||
|
@ -138,7 +138,7 @@ images:
|
|||||||
|
|
||||||
## Current Tests
|
## Current Tests
|
||||||
|
|
||||||
| ID | Description | Platform | #GPUS | Collected & evaluated metrics |
|
| ID | Description | Platform | #GPUs | Collected & evaluated metrics |
|
||||||
|-------------------------------------|-------------------------------------------------------------------|-----------------------------|-------|------------------------------------------|
|
|-------------------------------------|-------------------------------------------------------------------|-----------------------------|-------|------------------------------------------|
|
||||||
| pytorch-transfromers-test-single | test bert finetuning using BERT fromtransformerlib+PT | SageMaker createTrainingJob | 1 | train_runtime, eval_accuracy & eval_loss |
|
| pytorch-transfromers-test-single | test bert finetuning using BERT fromtransformerlib+PT | SageMaker createTrainingJob | 1 | train_runtime, eval_accuracy & eval_loss |
|
||||||
| pytorch-transfromers-test-2-ddp | test bert finetuning using BERT from transformer lib+ PT DPP | SageMaker createTrainingJob | 16 | train_runtime, eval_accuracy & eval_loss |
|
| pytorch-transfromers-test-2-ddp | test bert finetuning using BERT from transformer lib+ PT DPP | SageMaker createTrainingJob | 16 | train_runtime, eval_accuracy & eval_loss |
|
||||||
|
Loading…
Reference in New Issue
Block a user