mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00
fix typos in the tests directory (#36717)
This commit is contained in:
parent
cbfb8d7b27
commit
7f5077e536
@ -18,7 +18,7 @@
|
||||
#
|
||||
# --variations allows you to compare variations in multiple dimensions.
|
||||
#
|
||||
# as the first dimention has 2 options and the second 3 in our example, this will run the trainer 6
|
||||
# as the first dimension has 2 options and the second 3 in our example, this will run the trainer 6
|
||||
# times adding one of:
|
||||
#
|
||||
# 1. --tf32 0 --fp16 0
|
||||
|
@ -405,7 +405,7 @@ class CoreIntegrationDeepSpeed(TestCasePlus, TrainerIntegrationCommon):
|
||||
self.assertFalse(torch.allclose(good_deepspeed_sin_cos, bad_deepspeed_sin_cos))
|
||||
torch.testing.assert_close(good_torch_sin_cos, good_deepspeed_sin_cos.cpu())
|
||||
|
||||
# Finally, we can see that the incorrect pattern is okay on vanilla torch, demostrating that this issue is
|
||||
# Finally, we can see that the incorrect pattern is okay on vanilla torch, demonstrating that this issue is
|
||||
# exclusive to DeepSpeed
|
||||
bad_torch_sin_cos = bad_deepspeed_create_sinusoidal_positions(
|
||||
model.config.max_position_embeddings, model.config.rotary_dim
|
||||
|
@ -193,7 +193,7 @@ class GenerationConfigTest(unittest.TestCase):
|
||||
generation_config_bad_temperature.update(temperature=None)
|
||||
self.assertEqual(len(captured_warnings), 0)
|
||||
|
||||
# Impossible sets of contraints/parameters will raise an exception
|
||||
# Impossible sets of constraints/parameters will raise an exception
|
||||
with self.assertRaises(ValueError):
|
||||
GenerationConfig(do_sample=False, num_beams=1, num_return_sequences=2)
|
||||
with self.assertRaises(ValueError):
|
||||
|
@ -751,7 +751,7 @@ class LogitsProcessorTest(unittest.TestCase):
|
||||
scores = self._get_uniform_logits(batch_size, vocab_size)
|
||||
processed_scores = logits_processor(input_ids, scores)
|
||||
self.assertTrue(torch.isneginf(processed_scores[:, bos_token_id + 1 :]).all())
|
||||
# score for bos_token_id shold be zero
|
||||
# score for bos_token_id should be zero
|
||||
self.assertListEqual(processed_scores[:, bos_token_id].tolist(), 4 * [0])
|
||||
|
||||
# processor should not change logits in-place
|
||||
@ -972,7 +972,7 @@ class LogitsProcessorTest(unittest.TestCase):
|
||||
|
||||
watermark = WatermarkLogitsProcessor(vocab_size=vocab_size, device=input_ids.device)
|
||||
|
||||
# use fixed id for last token, needed for reprodicibility and tests
|
||||
# use fixed id for last token, needed for reproducibility and tests
|
||||
input_ids[:, -1] = 10
|
||||
scores_wo_bias = scores[:, -1].clone()
|
||||
out = watermark(input_ids=input_ids, scores=scores)
|
||||
|
@ -256,7 +256,7 @@ class StoppingCriteriaTestCase(unittest.TestCase):
|
||||
]
|
||||
)
|
||||
|
||||
# trigger stopping when at leat one criteria is satisfied, one value per batch
|
||||
# trigger stopping when at least one criteria is satisfied, one value per batch
|
||||
self.assertTrue(criteria(inputs["input_ids"], scores))
|
||||
|
||||
# return False when neither is satisfied
|
||||
@ -283,7 +283,7 @@ class StoppingCriteriaTestCase(unittest.TestCase):
|
||||
]
|
||||
)
|
||||
|
||||
# trigger stopping when at leat one criteria is satisfied
|
||||
# trigger stopping when at least one criteria is satisfied
|
||||
self.assertListEqual(criteria(inputs["input_ids"], scores).tolist(), [True, False, False])
|
||||
|
||||
# False when neither is satisfied
|
||||
|
@ -173,7 +173,7 @@ class GenerationTesterMixin:
|
||||
def _check_similar_generate_outputs(self, output_1, output_2, atol=1e-5, rtol=1e-5):
|
||||
"""
|
||||
Checks whether a pair of generate outputs are similar. Two `generate` call outputs are considered similar in
|
||||
the following siturations:
|
||||
the following situations:
|
||||
1. The sequences are the same
|
||||
2. The sequences are different, but the scores up to (and including) the first mismatch are nearly identical
|
||||
"""
|
||||
@ -1617,7 +1617,7 @@ class GenerationTesterMixin:
|
||||
embed_dim = getattr(text_config, "d_model", text_config.hidden_size)
|
||||
per_head_embed_dim = embed_dim // num_attention_heads
|
||||
|
||||
# some models have diffent num-head for query vs key/value so we need to assign correct value
|
||||
# some models have different num-head for query vs key/value so we need to assign correct value
|
||||
# BUT only after `per_head_embed_dim` is set
|
||||
num_attention_heads = (
|
||||
text_config.num_key_value_heads
|
||||
@ -2316,7 +2316,7 @@ class GenerationTesterMixin:
|
||||
def _test_attention_implementation(self, attn_implementation):
|
||||
"""
|
||||
Compares the output of generate with the eager attention implementation against other implementations.
|
||||
NOTE: despite the test logic being the same, different implementations actually need diferent decorators, hence
|
||||
NOTE: despite the test logic being the same, different implementations actually need different decorators, hence
|
||||
this separate function.
|
||||
"""
|
||||
max_new_tokens = 30
|
||||
@ -4619,7 +4619,7 @@ class GenerationIntegrationTests(unittest.TestCase):
|
||||
self.assertTrue(diff < 1e-4)
|
||||
|
||||
def test_generate_input_ids_as_kwarg(self):
|
||||
"""Test that `input_ids` work equaly as a positional and keyword argument in decoder-only models"""
|
||||
"""Test that `input_ids` work equally as a positional and keyword argument in decoder-only models"""
|
||||
article = "I need input_ids to generate"
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2", max_length=15)
|
||||
@ -4636,7 +4636,7 @@ class GenerationIntegrationTests(unittest.TestCase):
|
||||
self.assertEqual(output_sequences.shape, (1, 15))
|
||||
|
||||
def test_generate_input_ids_as_encoder_kwarg(self):
|
||||
"""Test that `input_ids` work equaly as a positional and keyword argument in encoder-decoder models"""
|
||||
"""Test that `input_ids` work equally as a positional and keyword argument in encoder-decoder models"""
|
||||
article = "Justin Timberlake and Jessica Biel, welcome to parenthood."
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||
|
@ -35,7 +35,7 @@ if is_torch_available():
|
||||
|
||||
class TestTensorParallel(TestCasePlus):
|
||||
def torchrun(self, script: str):
|
||||
"""Run the `script` using `torchrun` command for multi-processing in a subprocess. Captures errors as necesary."""
|
||||
"""Run the `script` using `torchrun` command for multi-processing in a subprocess. Captures errors as necessary."""
|
||||
with tempfile.NamedTemporaryFile(mode="w+", suffix=".py") as tmp:
|
||||
tmp.write(script)
|
||||
tmp.flush()
|
||||
|
@ -599,7 +599,7 @@ class TFModelTesterMixin:
|
||||
if model.config.is_encoder_decoder:
|
||||
signature = inspect.signature(model.call)
|
||||
arg_names = [*signature.parameters.keys()]
|
||||
if "decoder_head_mask" in arg_names: # necessary diferentiation because of T5 model
|
||||
if "decoder_head_mask" in arg_names: # necessary differentiation because of T5 model
|
||||
inputs["decoder_head_mask"] = head_mask
|
||||
if "cross_attn_head_mask" in arg_names:
|
||||
inputs["cross_attn_head_mask"] = head_mask
|
||||
|
@ -241,7 +241,7 @@ def bytes2megabytes(x):
|
||||
return int(x / 2**20)
|
||||
|
||||
|
||||
# Copied from acclerate: https://github.com/huggingface/accelerate/blob/ee163b66fb7848892519e804688cb4ae981aacbe/src/accelerate/test_utils/scripts/external_deps/test_peak_memory_usage.py#L40C1-L73C68
|
||||
# Copied from accelerate: https://github.com/huggingface/accelerate/blob/ee163b66fb7848892519e804688cb4ae981aacbe/src/accelerate/test_utils/scripts/external_deps/test_peak_memory_usage.py#L40C1-L73C68
|
||||
class TorchTracemalloc:
|
||||
def __enter__(self):
|
||||
gc.collect()
|
||||
@ -4086,7 +4086,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
# Functional check
|
||||
self.assertAlmostEqual(loss, orig_loss)
|
||||
|
||||
# AOT Autograd recomputaion and nvfuser recomputation optimization
|
||||
# AOT Autograd recomputation and nvfuser recomputation optimization
|
||||
# aggressively fuses the operations and reduce the memory footprint.
|
||||
self.assertGreater(orig_peak_mem, peak_mem * 2)
|
||||
|
||||
|
@ -186,7 +186,7 @@ class Seq2seqTrainerTester(TestCasePlus):
|
||||
|
||||
@require_torch
|
||||
def test_bad_generation_config_fail_early(self):
|
||||
# Tests that a bad geneartion config causes the trainer to fail early
|
||||
# Tests that a bad generation config causes the trainer to fail early
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, return_tensors="pt", padding="longest")
|
||||
|
@ -436,7 +436,7 @@ NEW_BERT_CONSTANT = "value"
|
||||
|
||||
self.init_file(file_name, bert_test)
|
||||
duplicate_module(file_name, bert_model_patterns, new_bert_model_patterns)
|
||||
# There should not be a new Copied from statement, the old one should be adapated.
|
||||
# There should not be a new Copied from statement, the old one should be adapted.
|
||||
self.check_result(dest_file_name, bert_expected)
|
||||
|
||||
self.init_file(file_name, bert_test)
|
||||
|
@ -996,7 +996,7 @@ class UtilFunctionTester(unittest.TestCase):
|
||||
image = np.random.randint(0, 256, (3, 32, 64))
|
||||
self.assertEqual(get_image_size(image), (32, 64))
|
||||
|
||||
# Test the channel dimension can be overriden
|
||||
# Test the channel dimension can be overridden
|
||||
image = np.random.randint(0, 256, (3, 32, 64))
|
||||
self.assertEqual(get_image_size(image, channel_dim=ChannelDimension.LAST), (3, 32))
|
||||
|
||||
|
@ -411,7 +411,7 @@ class RopeTest(unittest.TestCase):
|
||||
self.assertEqual(attention_scale, 1.0)
|
||||
|
||||
# Check 2: based on `low_freq_factor` and `high_freq_factor`, the frequencies will be scaled between 1 and
|
||||
# `factor` (similar to yarn). Low frequencies get scaled by `factor`, high frequences see no change, medium
|
||||
# `factor` (similar to yarn). Low frequencies get scaled by `factor`, high frequencies see no change, medium
|
||||
# frequencies are scaled by a value in between. Changing `low_freq_factor` and `high_freq_factor` changes what
|
||||
# is considered low, medium, and high frequencies.
|
||||
factor = 10.0
|
||||
|
@ -1686,7 +1686,7 @@ class ModelUtilsTest(TestCasePlus):
|
||||
def test_isin_mps_friendly(self):
|
||||
"""tests that our custom `isin_mps_friendly` matches `torch.isin`"""
|
||||
random_ids = torch.randint(0, 100, (100,))
|
||||
# We can match against an interger
|
||||
# We can match against an integer
|
||||
random_test_integer = torch.randint(0, 100, (1,)).item()
|
||||
self.assertTrue(
|
||||
torch.equal(
|
||||
@ -1911,7 +1911,7 @@ class ModelUtilsTest(TestCasePlus):
|
||||
@require_torch_gpu
|
||||
def test_loading_is_fast_on_gpu(self, model_id: str, max_loading_time: float):
|
||||
"""
|
||||
This test is used to avoid regresion on https://github.com/huggingface/transformers/pull/36380.
|
||||
This test is used to avoid regression on https://github.com/huggingface/transformers/pull/36380.
|
||||
10s should be more than enough for both models, and allows for some margin as loading time are quite
|
||||
unstable. Before #36380, it used to take more than 40s, so 10s is still reasonable.
|
||||
Note that we run this test in a subprocess, to ensure that cuda is not already initialized/warmed-up.
|
||||
|
Loading…
Reference in New Issue
Block a user