mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 21:00:08 +06:00
CI: AMD MI300 tests fix (#30797)
* add fix * update import * updated dicts and comments * remove prints * Update testing_utils.py
This commit is contained in:
parent
a755745546
commit
7a4792e6b3
@ -166,6 +166,15 @@ ENDPOINT_STAGING = "https://hub-ci.huggingface.co"
|
||||
# Not critical, only usable on the sandboxed CI instance.
|
||||
TOKEN = "hf_94wBhPGp6KrrTH3KDchhKpRxZwd6dmHWLL"
|
||||
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
IS_ROCM_SYSTEM = torch.version.hip is not None
|
||||
IS_CUDA_SYSTEM = torch.version.cuda is not None
|
||||
else:
|
||||
IS_ROCM_SYSTEM = False
|
||||
IS_CUDA_SYSTEM = False
|
||||
|
||||
|
||||
def parse_flag_from_env(key, default=False):
|
||||
try:
|
||||
|
@ -1327,4 +1327,9 @@ class BarkModelIntegrationTests(unittest.TestCase):
|
||||
output_with_offload = self.model.generate(**input_ids, do_sample=False, temperature=1.0)
|
||||
|
||||
# checks if same output
|
||||
self.assertListEqual(output_with_no_offload.tolist(), output_with_offload.tolist())
|
||||
self.assertListAlmostEqual(output_with_no_offload.squeeze().tolist(), output_with_offload.squeeze().tolist())
|
||||
|
||||
def assertListAlmostEqual(self, list1, list2, tol=1e-6):
|
||||
self.assertEqual(len(list1), len(list2))
|
||||
for a, b in zip(list1, list2):
|
||||
self.assertAlmostEqual(a, b, delta=tol)
|
||||
|
@ -601,6 +601,11 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
@require_read_token
|
||||
def test_model_2b_bf16(self):
|
||||
model_id = "google/gemma-2b"
|
||||
|
||||
# Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
|
||||
#
|
||||
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
|
||||
# considering differences in hardware processing and potential deviations in generated text.
|
||||
EXPECTED_TEXTS = {
|
||||
7: [
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music",
|
||||
@ -610,6 +615,10 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music",
|
||||
"Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
|
||||
],
|
||||
9: [
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music",
|
||||
"Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
|
||||
],
|
||||
}
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to(
|
||||
@ -627,6 +636,11 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
@require_read_token
|
||||
def test_model_2b_eager(self):
|
||||
model_id = "google/gemma-2b"
|
||||
|
||||
# Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
|
||||
#
|
||||
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
|
||||
# considering differences in hardware processing and potential deviations in generated text.
|
||||
EXPECTED_TEXTS = {
|
||||
7: [
|
||||
"Hello I am doing a project on the 1990s and I am looking for some information on the ",
|
||||
@ -636,6 +650,10 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music",
|
||||
"Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
|
||||
],
|
||||
9: [
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music",
|
||||
"Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
|
||||
],
|
||||
}
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
@ -655,6 +673,11 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
@require_read_token
|
||||
def test_model_2b_sdpa(self):
|
||||
model_id = "google/gemma-2b"
|
||||
|
||||
# Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
|
||||
#
|
||||
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
|
||||
# considering differences in hardware processing and potential deviations in generated text.
|
||||
EXPECTED_TEXTS = {
|
||||
7: [
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music",
|
||||
@ -664,6 +687,10 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music",
|
||||
"Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
|
||||
],
|
||||
9: [
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music",
|
||||
"Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
|
||||
],
|
||||
}
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
@ -763,6 +790,11 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
@require_read_token
|
||||
def test_model_7b_bf16(self):
|
||||
model_id = "google/gemma-7b"
|
||||
|
||||
# Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
|
||||
#
|
||||
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
|
||||
# considering differences in hardware processing and potential deviations in generated text.
|
||||
EXPECTED_TEXTS = {
|
||||
7: [
|
||||
"""Hello I am doing a project on a 1991 240sx and I am trying to find""",
|
||||
@ -772,6 +804,10 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
"Hello I am doing a project for my school and I am trying to make a program that will read a .txt file",
|
||||
"Hi today I am going to show you how to make a very simple and easy to make a very simple and",
|
||||
],
|
||||
9: [
|
||||
"Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees",
|
||||
"Hi today I am going to show you how to make a very simple and easy to make DIY light up sign",
|
||||
],
|
||||
}
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to(
|
||||
@ -845,6 +881,11 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
NUM_TOKENS_TO_GENERATE = 40
|
||||
# Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test
|
||||
# was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs.
|
||||
#
|
||||
# Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
|
||||
#
|
||||
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
|
||||
# considering differences in hardware processing and potential deviations in generated text.
|
||||
EXPECTED_TEXT_COMPLETION = {
|
||||
8: [
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found",
|
||||
@ -854,6 +895,10 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found",
|
||||
"Hi today\nI have a problem with my 2007 1.9 tdi 105bhp.\nI have a problem with the engine management light on.\nI have checked the",
|
||||
],
|
||||
9: [
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found",
|
||||
"Hi today\nI have a problem with my 2007 1.9 tdi 105bhp.\nI have a problem with the engine management light on.\nI have checked the",
|
||||
],
|
||||
}
|
||||
|
||||
prompts = ["Hello I am doing", "Hi today"]
|
||||
|
@ -26,7 +26,7 @@ import requests
|
||||
|
||||
from transformers import AutoModelForVision2Seq, AutoProcessor, Kosmos2Config
|
||||
from transformers.models.kosmos2.configuration_kosmos2 import Kosmos2TextConfig, Kosmos2VisionConfig
|
||||
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
|
||||
from transformers.testing_utils import IS_ROCM_SYSTEM, require_torch, require_vision, slow, torch_device
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@ -562,6 +562,8 @@ class Kosmos2ModelIntegrationTest(unittest.TestCase):
|
||||
processed_text = processed_text[0]
|
||||
final_text, entities = final_text_with_entities[0]
|
||||
|
||||
atol = 1e-4 if IS_ROCM_SYSTEM else 1e-5
|
||||
|
||||
np.testing.assert_allclose(
|
||||
torch.concat(scores[1:4])[:3, :3].to("cpu").numpy(),
|
||||
np.array(
|
||||
@ -571,7 +573,7 @@ class Kosmos2ModelIntegrationTest(unittest.TestCase):
|
||||
[-0.9352350831031799, -4.688288688659668, 6.240612983703613],
|
||||
]
|
||||
),
|
||||
atol=1e-5,
|
||||
atol=atol,
|
||||
)
|
||||
np.testing.assert_allclose(
|
||||
torch.concat(scores[-3:])[-3:, -3:].to("cpu").numpy(),
|
||||
@ -629,7 +631,7 @@ class Kosmos2ModelIntegrationTest(unittest.TestCase):
|
||||
[-0.7624598741531372, -4.771658897399902, 6.576295852661133],
|
||||
]
|
||||
),
|
||||
atol=1e-5,
|
||||
atol=atol,
|
||||
)
|
||||
np.testing.assert_allclose(
|
||||
torch.concat(scores[-3:])[-3:, -3:].to("cpu").numpy(),
|
||||
|
@ -715,6 +715,11 @@ class LlamaIntegrationTest(unittest.TestCase):
|
||||
NUM_TOKENS_TO_GENERATE = 40
|
||||
# Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test
|
||||
# was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs.
|
||||
#
|
||||
# Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
|
||||
#
|
||||
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
|
||||
# considering differences in hardware processing and potential deviations in generated text.
|
||||
EXPECTED_TEXT_COMPLETION = {
|
||||
8: [
|
||||
"Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
|
||||
@ -730,7 +735,15 @@ class LlamaIntegrationTest(unittest.TestCase):
|
||||
"My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, "
|
||||
"and even on a good old fashioned cheeseburger. I love it on everything. I love it so",
|
||||
],
|
||||
9: [
|
||||
"Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial"
|
||||
" reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
|
||||
"theory of relativ",
|
||||
"My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs,"
|
||||
" my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
|
||||
],
|
||||
}
|
||||
expected_text_completion_idx = 8
|
||||
|
||||
prompts = [
|
||||
"Simply put, the theory of relativity states that ",
|
||||
@ -745,7 +758,9 @@ class LlamaIntegrationTest(unittest.TestCase):
|
||||
# Dynamic Cache
|
||||
generated_ids = model.generate(**inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False)
|
||||
dynamic_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
||||
self.assertEqual(EXPECTED_TEXT_COMPLETION[8], dynamic_text) # Both GPU architectures have the same output
|
||||
self.assertEqual(
|
||||
EXPECTED_TEXT_COMPLETION[expected_text_completion_idx], dynamic_text
|
||||
) # Both GPU architectures have the same output
|
||||
|
||||
# Static Cache
|
||||
generated_ids = model.generate(
|
||||
|
@ -520,12 +520,16 @@ class MistralIntegrationTest(unittest.TestCase):
|
||||
EXPECTED_MEAN = torch.tensor([[-2.5548, -2.5737, -3.0600, -2.5906, -2.8478, -2.8118, -2.9325, -2.7694]])
|
||||
torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2)
|
||||
|
||||
# Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
|
||||
#
|
||||
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
|
||||
# considering differences in hardware processing and potential deviations in output.
|
||||
EXPECTED_SLICE = {
|
||||
7: torch.tensor([-5.8781, -5.8616, -0.1052, -4.7200, -5.8781, -5.8774, -5.8773, -5.8777, -5.8781, -5.8780, -5.8781, -5.8779, -1.0787, 1.7583, -5.8779, -5.8780, -5.8783, -5.8778, -5.8776, -5.8781, -5.8784, -5.8778, -5.8778, -5.8777, -5.8779, -5.8778, -5.8776, -5.8780, -5.8779, -5.8781]),
|
||||
8: torch.tensor([-5.8711, -5.8555, -0.1050, -4.7148, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -1.0781, 1.7568, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711]),
|
||||
9: torch.tensor([-5.8750, -5.8594, -0.1047, -4.7188, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -1.0781, 1.7578, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750]),
|
||||
} # fmt: skip
|
||||
|
||||
print(out[0, 0, :30])
|
||||
torch.testing.assert_close(
|
||||
out[0, 0, :30], EXPECTED_SLICE[self.cuda_compute_capability_major_version], atol=1e-4, rtol=1e-4
|
||||
)
|
||||
@ -625,9 +629,14 @@ class MistralIntegrationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_speculative_generation(self):
|
||||
# Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
|
||||
#
|
||||
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
|
||||
# considering differences in hardware processing and potential deviations in generated text.
|
||||
EXPECTED_TEXT_COMPLETION = {
|
||||
7: "My favourite condiment is 100% Sriracha. I love the heat, the tang and the fact costs",
|
||||
8: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big",
|
||||
9: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big",
|
||||
}
|
||||
prompt = "My favourite condiment is "
|
||||
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", use_fast=False)
|
||||
|
@ -553,6 +553,10 @@ class MixtralIntegrationTest(unittest.TestCase):
|
||||
)
|
||||
# TODO: might need to tweak it in case the logits do not match on our daily runners
|
||||
# these logits have been obtained with the original megablocks impelmentation.
|
||||
# Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
|
||||
#
|
||||
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
|
||||
# considering differences in hardware processing and potential deviations in output.
|
||||
EXPECTED_LOGITS = {
|
||||
7: torch.Tensor([[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]).to(
|
||||
torch_device
|
||||
@ -560,6 +564,9 @@ class MixtralIntegrationTest(unittest.TestCase):
|
||||
8: torch.Tensor([[0.1631, 0.1621, 0.6094], [-0.8906, -0.1621, -0.6094], [0.1572, 0.1270, 0.7227]]).to(
|
||||
torch_device
|
||||
),
|
||||
9: torch.Tensor([[0.1641, 0.1621, 0.6094], [-0.8906, -0.1631, -0.6094], [0.1572, 0.1260, 0.7227]]).to(
|
||||
torch_device
|
||||
),
|
||||
}
|
||||
with torch.no_grad():
|
||||
logits = model(dummy_input).logits
|
||||
@ -583,6 +590,11 @@ class MixtralIntegrationTest(unittest.TestCase):
|
||||
)
|
||||
|
||||
# TODO: might need to tweak it in case the logits do not match on our daily runners
|
||||
#
|
||||
# Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
|
||||
#
|
||||
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
|
||||
# considering differences in hardware processing and potential deviations in generated text.
|
||||
EXPECTED_LOGITS_LEFT = {
|
||||
7: torch.Tensor(
|
||||
[[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]],
|
||||
@ -590,6 +602,9 @@ class MixtralIntegrationTest(unittest.TestCase):
|
||||
8: torch.Tensor([[0.1914, 0.0508, 0.7188], [0.1953, 0.0510, 0.7227], [0.1973, 0.0562, 0.7148]]).to(
|
||||
torch_device
|
||||
),
|
||||
9: torch.Tensor([[0.1904, 0.0513, 0.7227], [0.1943, 0.0518, 0.7227], [0.1982, 0.0557, 0.7148]]).to(
|
||||
torch_device
|
||||
),
|
||||
}
|
||||
|
||||
EXPECTED_LOGITS_LEFT_UNPADDED = {
|
||||
@ -599,6 +614,9 @@ class MixtralIntegrationTest(unittest.TestCase):
|
||||
8: torch.Tensor([[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]]).to(
|
||||
torch_device
|
||||
),
|
||||
9: torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]]).to(
|
||||
torch_device
|
||||
),
|
||||
}
|
||||
|
||||
EXPECTED_LOGITS_RIGHT_UNPADDED = {
|
||||
@ -608,6 +626,9 @@ class MixtralIntegrationTest(unittest.TestCase):
|
||||
8: torch.Tensor([[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to(
|
||||
torch_device
|
||||
),
|
||||
9: torch.Tensor([[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]]).to(
|
||||
torch_device
|
||||
),
|
||||
}
|
||||
|
||||
with torch.no_grad():
|
||||
|
@ -26,7 +26,14 @@ import numpy as np
|
||||
from datasets import load_dataset
|
||||
|
||||
from transformers import PerceiverConfig
|
||||
from transformers.testing_utils import require_torch, require_torch_multi_gpu, require_vision, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
IS_ROCM_SYSTEM,
|
||||
require_torch,
|
||||
require_torch_multi_gpu,
|
||||
require_vision,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@ -930,7 +937,8 @@ class PerceiverModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
expected_slice = torch.tensor([-1.1652, -0.1992, -0.7520], device=torch_device)
|
||||
|
||||
self.assertTrue(torch.allclose(logits[0, :3], expected_slice, atol=1e-4))
|
||||
atol = 1e-3 if IS_ROCM_SYSTEM else 1e-4
|
||||
self.assertTrue(torch.allclose(logits[0, :3], expected_slice, atol=atol))
|
||||
|
||||
@slow
|
||||
def test_inference_image_classification_fourier(self):
|
||||
|
@ -2551,7 +2551,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
)
|
||||
eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev")
|
||||
|
||||
training_args = TrainingArguments(output_dir="./examples", use_cpu=True)
|
||||
training_args = TrainingArguments(output_dir="./examples", use_cpu=True, report_to="none")
|
||||
trainer = Trainer(model=model, args=training_args, eval_dataset=eval_dataset)
|
||||
result = trainer.evaluate()
|
||||
self.assertLess(result["eval_loss"], 0.2)
|
||||
@ -2572,6 +2572,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
output_dir="./examples",
|
||||
use_cpu=True,
|
||||
per_device_eval_batch_size=1,
|
||||
report_to="none",
|
||||
)
|
||||
trainer = Trainer(
|
||||
model=model,
|
||||
@ -3107,6 +3108,8 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
"--predict_with_generate",
|
||||
"--ddp_timeout",
|
||||
"60",
|
||||
"--report_to",
|
||||
"none",
|
||||
]
|
||||
execute_subprocess_async(command)
|
||||
# successful return here == success - any errors would have caused an error or a timeout in the sub-call
|
||||
|
@ -161,7 +161,6 @@ class Seq2seqTrainerTester(TestCasePlus):
|
||||
tokenizer=tokenizer,
|
||||
data_collator=data_collator,
|
||||
compute_metrics=lambda x: {"samples": x[0].shape[0]},
|
||||
report_to="none",
|
||||
)
|
||||
|
||||
def prepare_data(examples):
|
||||
|
Loading…
Reference in New Issue
Block a user