diff --git a/ISSUES.md b/ISSUES.md
index a5969a3027f..3b4e587a6d1 100644
--- a/ISSUES.md
+++ b/ISSUES.md
@@ -263,9 +263,9 @@ You are not required to read the following guidelines before opening an issue. H
     But if you're replying to a comment that happened some comments back it's always a good practice to quote just the relevant lines you're replying it. The `>` is used for quoting, or you can always use the menu to do so. For example your editor box will look like:
 
     ```
-    > How big is your gpu cluster?
+    > How big is your GPU cluster?
 
-    Our cluster is made of 256 gpus.
+    Our cluster is made of 256 GPUs.
     ```
 
     If you are addressing multiple comments, quote the relevant parts of each before your answer. Some people use the same comment to do multiple replies, others separate them into separate comments. Either way works. The latter approach helps for linking to a specific comment.
diff --git a/examples/legacy/seq2seq/README.md b/examples/legacy/seq2seq/README.md
index fb826129a8e..741d8b5dd54 100644
--- a/examples/legacy/seq2seq/README.md
+++ b/examples/legacy/seq2seq/README.md
@@ -209,7 +209,7 @@ th 56 \
 ```
 
 ### Multi-GPU Evaluation
-here is a command to run xsum evaluation on 8 GPUS. It is more than linearly faster than run_eval.py in some cases
+here is a command to run xsum evaluation on 8 GPUs. It is more than linearly faster than run_eval.py in some cases
 because it uses SortishSampler to minimize padding. You can also use it on 1 GPU. `data_dir` must have
 `{type_path}.source` and `{type_path}.target`. Run `./run_distributed_eval.py --help` for all clargs.
 
diff --git a/tests/quantization/eetq_integration/test_eetq.py b/tests/quantization/eetq_integration/test_eetq.py
index f14fa076e4b..b1453f85eae 100644
--- a/tests/quantization/eetq_integration/test_eetq.py
+++ b/tests/quantization/eetq_integration/test_eetq.py
@@ -158,7 +158,7 @@ class EetqTest(unittest.TestCase):
     def test_quantized_model_multi_gpu(self):
         """
         Simple test that checks if the quantized model is working properly with multiple GPUs
-        set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUS
+        set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUs
         """
         input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
         quantization_config = EetqConfig()
diff --git a/tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py b/tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py
index a9ff650c039..3efff115ba7 100644
--- a/tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py
+++ b/tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py
@@ -215,7 +215,7 @@ class FbgemmFp8Test(unittest.TestCase):
     def test_quantized_model_multi_gpu(self):
         """
         Simple test that checks if the quantized model is working properly with multiple GPUs
-        set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUS
+        set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUs
         """
         input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
         quantization_config = FbgemmFp8Config()
diff --git a/tests/quantization/finegrained_fp8/test_fp8.py b/tests/quantization/finegrained_fp8/test_fp8.py
index f572567ed18..e59c2068cde 100644
--- a/tests/quantization/finegrained_fp8/test_fp8.py
+++ b/tests/quantization/finegrained_fp8/test_fp8.py
@@ -193,7 +193,7 @@ class FP8QuantizerTest(unittest.TestCase):
     def test_quantized_model_multi_gpu(self):
         """
         Simple test that checks if the quantized model is working properly with multiple GPUs
-        set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUS
+        set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUs
         """
         input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(self.device_map)
         quantization_config = FineGrainedFP8Config()
diff --git a/tests/quantization/higgs/test_higgs.py b/tests/quantization/higgs/test_higgs.py
index ece8af7c684..687a4ab22f2 100644
--- a/tests/quantization/higgs/test_higgs.py
+++ b/tests/quantization/higgs/test_higgs.py
@@ -156,7 +156,7 @@ class HiggsTest(unittest.TestCase):
     def test_quantized_model_multi_gpu(self):
         """
         Simple test that checks if the quantized model is working properly with multiple GPUs
-        set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUS
+        set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUs
         """
         input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
         quantization_config = HiggsConfig()
diff --git a/tests/quantization/torchao_integration/test_torchao.py b/tests/quantization/torchao_integration/test_torchao.py
index 037bf1506f1..55d57d986c2 100644
--- a/tests/quantization/torchao_integration/test_torchao.py
+++ b/tests/quantization/torchao_integration/test_torchao.py
@@ -255,7 +255,7 @@ class TorchAoGPUTest(TorchAoTest):
     def test_int4wo_quant_multi_gpu(self):
         """
         Simple test that checks if the quantized model int4 weight only is working properly with multiple GPUs
-        set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUS
+        set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUs
         """
 
         quant_config = TorchAoConfig("int4_weight_only", **self.quant_scheme_kwargs)
diff --git a/tests/sagemaker/README.md b/tests/sagemaker/README.md
index cfbcf390b99..70dc301f9c3 100644
--- a/tests/sagemaker/README.md
+++ b/tests/sagemaker/README.md
@@ -138,7 +138,7 @@ images:
 
 ## Current Tests
 
-| ID                                  | Description                                                       | Platform                   | #GPUS | Collected & evaluated metrics            |
+| ID                                  | Description                                                       | Platform                   | #GPUs | Collected & evaluated metrics            |
 |-------------------------------------|-------------------------------------------------------------------|-----------------------------|-------|------------------------------------------|
 | pytorch-transfromers-test-single    | test bert finetuning using BERT fromtransformerlib+PT             | SageMaker createTrainingJob | 1     | train_runtime, eval_accuracy & eval_loss |
 | pytorch-transfromers-test-2-ddp     | test bert finetuning using BERT from transformer lib+ PT DPP      | SageMaker createTrainingJob | 16    | train_runtime, eval_accuracy & eval_loss |