Update trainer_pt_utils.py docstrings for consistency (#36912)

* Update trainer_pt_utils.py

* update docstrings trainer_pt_utils.py for consistency

* Update src/transformers/trainer_pt_utils.py

---------

Co-authored-by: Matt <Rocketknight1@users.noreply.github.com>
This commit is contained in:
Ethan Knights 2025-03-24 14:46:41 +00:00 committed by GitHub
parent cbf924b76c
commit a6ecb54159
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -291,7 +291,7 @@ class DistributedSamplerWithLoop(DistributedSampler):
class EvalLoopContainer:
"""
Container to store intermediate results of evaluation loop
Container to store intermediate results of evaluation loop.
Args:
do_nested_concat (`bool`, *optional*, defaults to `True`):
@ -443,7 +443,7 @@ class DistributedTensorGatherer:
- P1: `[6, 7, 8, 9, 10, 11]`
- P2: `[12, 13, 14, 15, 0, 1]`
The first batch treated on each process will be
The first batch treated on each process will be:
- P0: `[0, 1]`
- P1: `[6, 7]`
@ -736,7 +736,7 @@ class DistributedLengthGroupedSampler(DistributedSampler):
# add extra samples to make it evenly divisible
indices += indices[: (self.total_size - len(indices))]
else:
# remove tail of data to make it evenly divisible.
# remove tail of data to make it evenly divisible
indices = indices[: self.total_size]
assert len(indices) == self.total_size
@ -928,7 +928,7 @@ def _get_learning_rate(self):
def _secs2timedelta(secs):
"""
convert seconds to hh:mm:ss.msec, msecs rounded to 2 decimals
Convert seconds to hh:mm:ss.msec, msecs rounded to 2 decimal places.
"""
msec = int(abs(secs - int(secs)) * 100)
@ -937,7 +937,7 @@ def _secs2timedelta(secs):
def metrics_format(self, metrics: dict[str, float]) -> dict[str, float]:
"""
Reformat Trainer metrics values to a human-readable format
Reformat Trainer metrics values to a human-readable format.
Args:
metrics (`Dict[str, float]`):
@ -963,7 +963,7 @@ def metrics_format(self, metrics: dict[str, float]) -> dict[str, float]:
def log_metrics(self, split, metrics):
"""
Log metrics in a specially formatted way
Log metrics in a specially formatted way.
Under distributed environment this is done only for a process with rank 0.
@ -977,7 +977,7 @@ def log_metrics(self, split, metrics):
In order to get memory usage report you need to install `psutil`. You can do that with `pip install psutil`.
Now when this method is run, you will see a report that will include: :
Now when this method is run, you will see a report that will include:
```
init_mem_cpu_alloc_delta = 1301MB
@ -1006,7 +1006,7 @@ def log_metrics(self, split, metrics):
The reporting happens only for process of rank 0 and gpu 0 (if there is a gpu). Typically this is enough since the
main process does the bulk of work, but it could be not quite so if model parallel is used and then other GPUs may
use a different amount of gpu memory. This is also not the same under DataParallel where gpu0 may require much more
memory than the rest since it stores the gradient and optimizer states for all participating GPUS. Perhaps in the
memory than the rest since it stores the gradient and optimizer states for all participating GPUs. Perhaps in the
future these reports will evolve to measure those too.
The CPU RAM metric measures RSS (Resident Set Size) includes both the memory which is unique to the process and the
@ -1091,7 +1091,7 @@ def save_metrics(self, split, metrics, combined=True):
def save_state(self):
"""
Saves the Trainer state, since Trainer.save_model saves only the tokenizer with the model
Saves the Trainer state, since Trainer.save_model saves only the tokenizer with the model.
Under distributed environment this is done only for a process with rank 0.
"""
@ -1104,7 +1104,7 @@ def save_state(self):
def get_model_param_count(model, trainable_only=False):
"""
Calculate model's total param count. If trainable_only is True then count only those requiring grads
Calculate model's total param count. If trainable_only is True then count only those requiring grads.
"""
if is_deepspeed_zero3_enabled():