Merge branch 'main' into better-from-pretrained-return-typehint

This commit is contained in:
Pavel Iakubovskii 2025-07-03 15:10:18 +01:00 committed by GitHub
commit 92c911980b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
63 changed files with 1186 additions and 544 deletions

View File

@ -84,8 +84,6 @@ jobs:
machine_type: ${{ matrix.machine_type }}
folder_slices: ${{ needs.setup.outputs.folder_slices }}
runner: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
report_name_prefix: run_models_gpu
secrets: inherit
run_trainer_and_fsdp_gpu:
@ -104,11 +102,10 @@ jobs:
folder_slices: ${{ needs.setup.outputs.folder_slices }}
runner: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
report_name_prefix: run_trainer_and_fsdp_gpu
secrets: inherit
run_pipelines_gpu:
if: ${{ inputs.job == 'run_pipelines_gpu' }}
run_pipelines_torch_gpu:
if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
name: Pipelines
strategy:
fail-fast: false
@ -161,20 +158,20 @@ jobs:
- name: Run all pipeline tests on Intel Gaudi
run: |
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_pipelines_gpu_test_reports tests/pipelines -m "not not_device_test"
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: |
cat reports/${{ env.machine_type }}_run_pipelines_gpu_test_reports/failures_short.txt
cat reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_gpu_test_reports"
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_pipelines_gpu_test_reports
path: reports/${{ env.machine_type }}_run_pipelines_gpu_test_reports
name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
path: reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
run_examples_gpu:
if: ${{ inputs.job == 'run_examples_gpu' }}
@ -248,8 +245,8 @@ jobs:
name: ${{ env.machine_type }}_run_examples_gpu_test_reports
path: reports/${{ env.machine_type }}_run_examples_gpu_test_reports
run_deepspeed_gpu:
if: ${{ inputs.job == 'run_deepspeed_gpu' }}
run_torch_cuda_extensions_gpu:
if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
name: Intel Gaudi deepspeed tests
strategy:
fail-fast: false
@ -305,20 +302,20 @@ jobs:
- name: Run all deepspeed tests on intel Gaudi
run: |
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_deepspeed_gpu_test_reports tests/deepspeed -m "not not_device_test"
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: |
cat reports/${{ env.machine_type }}_run_deepspeed_gpu_test_reports/failures_short.txt
cat reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_deepspeed_gpu_test_reports"
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_deepspeed_gpu_test_reports
path: reports/${{ env.machine_type }}_run_deepspeed_gpu_test_reports
name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
path: reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
send_results:
name: Slack Report
@ -327,8 +324,8 @@ jobs:
setup,
run_models_gpu,
run_examples_gpu,
run_pipelines_gpu,
run_deepspeed_gpu,
run_torch_cuda_extensions_gpu,
run_pipelines_torch_gpu,
run_trainer_and_fsdp_gpu,
]
if: ${{ always() }}

View File

@ -23,7 +23,7 @@ jobs:
name: Pipeline CI
uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
with:
job: run_pipelines_gpu
job: run_pipelines_torch_gpu
ci_event: Scheduled CI (Intel) - Gaudi3
runner_scale_set: itac-bm-emr-gaudi3-dell
slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
@ -47,7 +47,7 @@ jobs:
name: DeepSpeed CI
uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
with:
job: run_deepspeed_gpu
job: run_torch_cuda_extensions_gpu
ci_event: Scheduled CI (Intel) - Gaudi3
runner_scale_set: itac-bm-emr-gaudi3-dell
slack_report_channel: "#transformers-ci-daily-intel-gaudi3"

View File

@ -44,7 +44,7 @@ tokens and decodes them back into audio.
from transformers import AutoProcessor, DiaForConditionalGeneration
torch_device = "cuda"
model_checkpoint = "buttercrab/dia-v1-1.6b"
model_checkpoint = "nari-labs/Dia-1.6B-0626"
text = ["[S1] Dia is an open weights text to dialogue model."]
processor = AutoProcessor.from_pretrained(model_checkpoint)
@ -66,7 +66,7 @@ from datasets import load_dataset, Audio
from transformers import AutoProcessor, DiaForConditionalGeneration
torch_device = "cuda"
model_checkpoint = "buttercrab/dia-v1-1.6b"
model_checkpoint = "nari-labs/Dia-1.6B-0626"
ds = load_dataset("hf-internal-testing/dailytalk-dummy", split="train")
ds = ds.cast_column("audio", Audio(sampling_rate=44100))
@ -93,7 +93,7 @@ from datasets import load_dataset, Audio
from transformers import AutoProcessor, DiaForConditionalGeneration
torch_device = "cuda"
model_checkpoint = "buttercrab/dia-v1-1.6b"
model_checkpoint = "nari-labs/Dia-1.6B-0626"
ds = load_dataset("hf-internal-testing/dailytalk-dummy", split="train")
ds = ds.cast_column("audio", Audio(sampling_rate=44100))

View File

@ -333,6 +333,11 @@ class ChatCommand(BaseTransformersCLICommand):
)
args.host, args.port = args.model_name_or_path_or_address.rsplit(":", 1)
if args.model_name_or_path is None:
raise ValueError(
"When connecting to a server, please specify a model name with the --model_name_or_path flag."
)
else:
self.spawn_backend = True
args.model_name_or_path = args.model_name_or_path_or_address

View File

@ -347,7 +347,7 @@ class ServeCommand(BaseTransformersCLICommand):
if not req.stream:
return {"error": "Only streaming mode is supported."}
update_model = req.model != self.loaded_model
update_model = self.canonicalized_model_name(req.model) != self.loaded_model
if update_model:
self.model, self.tokenizer = self.load_model_and_tokenizer(req.model, self.args)
@ -402,7 +402,7 @@ class ServeCommand(BaseTransformersCLICommand):
if self.last_messages is None:
req_continues_last_messages = False
# The new request has fewer rounds of conversation: this is a new request
elif len(self.last_messages) > len(req.messages):
elif len(self.last_messages) >= len(req.messages):
req_continues_last_messages = False
# Otherwise, check that the last messages are a subset of the new request
else:
@ -417,7 +417,7 @@ class ServeCommand(BaseTransformersCLICommand):
def generate(self, app):
@app.post("/v1/chat/completions")
def _serve(req: "ChatCompletionInput"):
update_model = req.model != self.loaded_model
update_model = self.canonicalized_model_name(req.model) != self.loaded_model
if update_model:
self.model, self.tokenizer = self.load_model_and_tokenizer(req.model, self.args)
@ -585,6 +585,11 @@ class ServeCommand(BaseTransformersCLICommand):
return quantization_config
def canonicalized_model_name(self, model_id: str) -> str:
if "@" in model_id:
return model_id
return f"{model_id}@main"
def load_model_and_tokenizer(
self, model_id_and_revision: str, args: ServeArguments
) -> tuple[PreTrainedModel, PreTrainedTokenizerFast]:
@ -621,9 +626,9 @@ class ServeCommand(BaseTransformersCLICommand):
if getattr(model, "hf_device_map", None) is None:
model = model.to(args.device)
self.loaded_model = model_id_and_revision
self.loaded_model = f"{model_id}@{revision}"
print("Loaded model", model_id_and_revision)
logger.warning(f"Loaded model {self.loaded_model}")
return model, tokenizer

View File

@ -3773,16 +3773,28 @@ class GenerationMixin(ContinuousMixin):
Beam Search stopping condition -- halts the generation loop if any of these conditions becomes False
"""
# a. Can the open beams improve the top completed scores?
# early_stopping == False -> apply heuristic = always get the best score from
# `cur_len - decoder_prompt_len`. See the discussion below for more details.
# https://github.com/huggingface/transformers/pull/20901#issuecomment-1369845565
# early_stopping == False -> apply heuristic = always get the best score from `cur_len - decoder_prompt_len`.
# early_stopping == "never" -> compute the best score from `max_length` or `cur_len`, depending on the
# sign of `length_penalty`. Positive `length_penalty` favors longer sequences, thus we use
# `max_length` there.
# !!
# Be sure to check the docstring for `early_stopping` and `length_penalty`. The default parameterization
# does NOT correspond to a canonical beam search implementation, and tends to favor shorter output sequences
# compared to it (the heuristic active by default underestimates the maximum achievable score, and thus cut
# generation short). Also, be mindful that length penalty > 0.0 actually favors longer sequences, despite
# its name. These modifications were empirically found in the past (prior to 2022) to produce better quality
# generations, and changing them is BC breaking.
# For a canonical beam search implementation, set `early_stopping="never"` and `length_penalty=0.0`.
# See the discussion below for more details.
# https://github.com/huggingface/transformers/pull/20901#issuecomment-1369845565
# !!
if early_stopping == "never" and length_penalty > 0.0:
best_hypothetical_length = max_length - decoder_prompt_len
else:
best_hypothetical_length = cur_len - decoder_prompt_len
# best-case scenario: the next tokens have logprobs=0 (probability=1), and the score stays the same before
# applying length penalty
best_possible_running_score = running_beam_scores[:, :1] / (best_hypothetical_length**length_penalty)
worst_finished_score = torch.where(is_sent_finished, torch.min(beam_scores, dim=1, keepdim=True)[0], -1.0e9)
improvement_possible = torch.any(best_possible_running_score > worst_finished_score)

View File

@ -4431,10 +4431,12 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, PushToHubMixin, PeftAdapterMi
"`tp_plan` and `device_map` are mutually exclusive. Choose either one for parallelization."
)
# If torchrun was used, make sure to TP by default. This way people don't need to change tp or device map
if device_map == "auto" and tp_plan is None and int(os.environ.get("WORLD_SIZE", 0)):
tp_plan = "auto" # device_map = "auto" in torchrun equivalent to TP plan = AUTO!
device_map = None
if device_map == "auto" and int(os.environ.get("WORLD_SIZE", 0)):
logger.info(
"You've set device_map=`auto` while triggering a distributed run with torchrun. This might lead to unexpected behavior. "
"If your plan is to load the model on each device, you should set device_map={"
": PartialState().process_index} where PartialState comes from accelerate library"
)
# We need to correctly dispatch the model on the current process device. The easiest way for this is to use a simple
# `device_map` pointing to the correct device

View File

@ -1269,13 +1269,13 @@ class Glm4vModel(Glm4vPreTrainedModel):
if input_ids is None:
video_mask = inputs_embeds == self.get_input_embeddings()(
torch.tensor(self.config.video_token_id, dtype=torch.long, device=inputs_embeds.device)
torch.tensor(self.config.image_token_id, dtype=torch.long, device=inputs_embeds.device)
)
video_mask = video_mask.all(-1)
else:
video_mask = input_ids == self.config.video_token_id
video_mask = input_ids == self.config.image_token_id
n_video_tokens = (video_mask).sum()
n_video_tokens = video_mask.sum()
n_video_features = video_embeds.shape[0]
video_mask = video_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
if not is_torchdynamo_compiling() and n_video_tokens != n_video_features:

View File

@ -1269,13 +1269,13 @@ class Glm4vModel(Qwen2_5_VLModel):
if input_ids is None:
video_mask = inputs_embeds == self.get_input_embeddings()(
torch.tensor(self.config.video_token_id, dtype=torch.long, device=inputs_embeds.device)
torch.tensor(self.config.image_token_id, dtype=torch.long, device=inputs_embeds.device)
)
video_mask = video_mask.all(-1)
else:
video_mask = input_ids == self.config.video_token_id
video_mask = input_ids == self.config.image_token_id
n_video_tokens = (video_mask).sum()
n_video_tokens = video_mask.sum()
n_video_features = video_embeds.shape[0]
video_mask = video_mask.unsqueeze(-1).expand_as(inputs_embeds).to(inputs_embeds.device)
if not is_torchdynamo_compiling() and n_video_tokens != n_video_features:

View File

@ -2357,7 +2357,7 @@ class Trainer:
model = self.accelerator.prepare(self.model)
else:
if delay_optimizer_creation:
self.optimizer = self.accelerator.prepare(self.optimizer)
model = self.accelerator.prepare(self.model)
else:
model, self.optimizer = self.accelerator.prepare(self.model, self.optimizer)
else:

View File

@ -865,50 +865,59 @@ def is_torch_hpu_available():
if not hasattr(torch, "hpu") or not torch.hpu.is_available():
return False
import habana_frameworks.torch.utils.experimental as htexp # noqa: F401
# IlyasMoutawwakil: We patch masked_fill_ for int64 tensors to avoid a bug on Gaudi1
# synNodeCreateWithId failed for node: masked_fill_fwd_i64 with synStatus 26 [Generic failure]
# This can be removed once Gaudi1 support is discontinued but for now we need it to keep using
# dl1.24xlarge Gaudi1 instances on AWS for testing.
# check if the device is Gaudi1 (vs Gaudi2, Gaudi3).
if htexp._get_device_type() == htexp.synDeviceType.synDeviceGaudi:
original_masked_fill_ = torch.Tensor.masked_fill_
def patched_masked_fill_(self, mask, value):
if self.dtype == torch.int64:
logger.warning_once(
"In-place tensor.masked_fill_(mask, value) is not supported for int64 tensors on Gaudi1. "
"This operation will be performed out-of-place using tensor[mask] = value."
)
self[mask] = value
else:
original_masked_fill_(self, mask, value)
torch.Tensor.masked_fill_ = patched_masked_fill_
# We patch torch.gather for int64 tensors to avoid a bug on Gaudi
# Graph compile failed with synStatus 26 [Generic failure]
# This can be removed once bug is fixed but for now we need it.
original_gather = torch.Tensor.gather
original_gather = torch.gather
def patched_gather(input: torch.Tensor, dim: int, index: torch.LongTensor) -> torch.Tensor:
if input.dtype == torch.int64 and input.device.type == "hpu":
logger.warning_once(
"torch.gather is not supported for int64 tensors on Gaudi. "
"This operation will be performed patched_gather using indexing."
)
idx = [torch.arange(size, device=input.device, dtype=input.dtype) for size in input.shape]
idx[dim] = index
idx = tuple(idx)
output = input[idx]
return output
return original_gather(input.to(torch.int32), dim, index).to(torch.int64)
else:
return original_gather(input, dim, index)
torch.gather = patched_gather
torch.Tensor.gather = patched_gather
original_take_along_dim = torch.take_along_dim
def patched_take_along_dim(
input: torch.Tensor, indices: torch.LongTensor, dim: Optional[int] = None
) -> torch.Tensor:
if input.dtype == torch.int64 and input.device.type == "hpu":
return original_take_along_dim(input.to(torch.int32), indices, dim).to(torch.int64)
else:
return original_take_along_dim(input, indices, dim)
torch.take_along_dim = patched_take_along_dim
original_cholesky = torch.linalg.cholesky
def safe_cholesky(A, *args, **kwargs):
output = original_cholesky(A, *args, **kwargs)
if torch.isnan(output).any():
jitter_value = 1e-9
diag_jitter = torch.eye(A.size(-1), dtype=A.dtype, device=A.device) * jitter_value
output = original_cholesky(A + diag_jitter, *args, **kwargs)
return output
torch.linalg.cholesky = safe_cholesky
original_scatter = torch.scatter
def patched_scatter(
input: torch.Tensor, dim: int, index: torch.Tensor, src: torch.Tensor, *args, **kwargs
) -> torch.Tensor:
if input.device.type == "hpu" and input is src:
return original_scatter(input, dim, index, src.clone(), *args, **kwargs)
else:
return original_scatter(input, dim, index, src, *args, **kwargs)
torch.scatter = patched_scatter
torch.Tensor.scatter = patched_scatter
# IlyasMoutawwakil: we patch torch.compile to use the HPU backend by default
# https://github.com/huggingface/transformers/pull/38790#discussion_r2157043944
# This is necessary for cases where torch.compile is used as a decorator (defaulting to inductor)

View File

@ -29,12 +29,34 @@ class ChatCLITest(unittest.TestCase):
self.assertIn("chat interface", cs.out.lower())
@patch.object(ChatCommand, "run")
def test_cli_dispatch(self, run_mock):
def test_cli_dispatch_model(self, run_mock):
"""
Running transformers chat with just a model should work & spawn a serve underneath
"""
args = ["transformers", "chat", "hf-internal-testing/tiny-random-gpt2"]
with patch("sys.argv", args):
cli.main()
run_mock.assert_called_once()
def test_cli_dispatch_url(self):
"""
Running transformers chat with just a URL should not work as a model should additionally be specified
"""
args = ["transformers", "chat", "localhost:8000"]
with self.assertRaises(ValueError):
with patch("sys.argv", args):
cli.main()
@patch.object(ChatCommand, "run")
def test_cli_dispatch_url_and_model(self, run_mock):
"""
Running transformers chat with a URL and a model should work
"""
args = ["transformers", "chat", "localhost:8000", "--model_name_or_path=hf-internal-testing/tiny-random-gpt2"]
with patch("sys.argv", args):
cli.main()
run_mock.assert_called_once()
def test_parsed_args(self):
with (
patch.object(ChatCommand, "__init__", return_value=None) as init_mock,

View File

@ -462,6 +462,9 @@ class AlignModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_config(self):
self.config_tester.run_common_tests()
def test_batching_equivalence(self, atol=3e-4, rtol=3e-4):
super().test_batching_equivalence(atol=atol, rtol=rtol)
@unittest.skip(reason="Start to fail after using torch `cu118`.")
def test_multi_gpu_data_parallel_forward(self):
super().test_multi_gpu_data_parallel_forward()

View File

@ -25,6 +25,7 @@ from transformers import (
AriaTextConfig,
AutoProcessor,
AutoTokenizer,
BitsAndBytesConfig,
is_torch_available,
is_vision_available,
)
@ -52,6 +53,9 @@ if is_torch_available():
if is_vision_available():
from PIL import Image
# Used to be https://aria-vl.github.io/static/images/view.jpg but it was removed, llava-vl has the same image
IMAGE_OF_VIEW_URL = "https://llava-vl.github.io/static/images/view.jpg"
class AriaVisionText2TextModelTester:
def __init__(
@ -262,23 +266,38 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
@require_bitsandbytes
def test_small_model_integration_test(self):
# Let's make sure we test the preprocessing to replace what is used
model = AriaForConditionalGeneration.from_pretrained("rhymes-ai/Aria", load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
"rhymes-ai/Aria",
quantization_config=BitsAndBytesConfig(load_in_4bit=True, llm_int8_skip_modules=["multihead_attn"]),
)
prompt = "<image>\nUSER: What are the things I should be cautious about when I visit this place?\nASSISTANT:"
image_file = "https://aria-vl.github.io/static/images/view.jpg"
raw_image = Image.open(requests.get(image_file, stream=True).raw)
inputs = self.processor(images=raw_image, text=prompt, return_tensors="pt")
prompt = "<|img|>\nUSER: What are the things I should be cautious about when I visit this place?\nASSISTANT:"
raw_image = Image.open(requests.get(IMAGE_OF_VIEW_URL, stream=True).raw)
inputs = self.processor(images=raw_image, text=prompt, return_tensors="pt").to(model.device, model.dtype)
EXPECTED_INPUT_IDS = torch.tensor([[1, 32000, 28705, 13, 11123, 28747, 1824, 460, 272, 1722,315, 1023, 347, 13831, 925, 684, 739, 315, 3251, 456,1633, 28804, 13, 4816, 8048, 12738, 28747]]) # fmt: skip
non_img_tokens = [
109, 3905, 2000, 93415, 4551, 1162, 901, 3894, 970, 2478, 1017, 19312, 2388, 1596, 1809, 970, 5449, 1235,
3333, 93483, 109, 61081, 11984, 14800, 93415
] # fmt: skip
EXPECTED_INPUT_IDS = torch.tensor([[9] * 256 + non_img_tokens]).to(inputs["input_ids"].device)
self.assertTrue(torch.equal(inputs["input_ids"], EXPECTED_INPUT_IDS))
output = model.generate(**inputs, max_new_tokens=20)
EXPECTED_DECODED_TEXT = "\nUSER: What are the things I should be cautious about when I visit this place?\nASSISTANT: When visiting this place, there are a few things one should be cautious about. Firstly," # fmt: skip
decoded_output = self.processor.decode(output[0], skip_special_tokens=True)
self.assertEqual(
self.processor.decode(output[0], skip_special_tokens=True),
EXPECTED_DECODED_TEXT,
)
expected_output = Expectations(
{
(
"cuda",
None,
): "\nUSER: What are the things I should be cautious about when I visit this place?\nASSISTANT: When visiting this place, there are a few things one should be cautious about. Firstly,",
(
"rocm",
(9, 5),
): "\n USER: What are the things I should be cautious about when I visit this place?\n ASSISTANT: When you visit this place, you should be cautious about the following things:\n\n- The",
}
).get_expectation()
self.assertEqual(decoded_output, expected_output)
@slow
@require_torch_large_accelerator
@ -287,20 +306,29 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
# Let's make sure we test the preprocessing to replace what is used
model_id = "rhymes-ai/Aria"
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
model_id,
quantization_config=BitsAndBytesConfig(load_in_4bit=True, llm_int8_skip_modules=["multihead_attn"]),
)
processor = AutoProcessor.from_pretrained(model_id)
prompt = "USER: <image>\nWhat are the things I should be cautious about when I visit this place? ASSISTANT:"
image_file = "https://aria-vl.github.io/static/images/view.jpg"
raw_image = Image.open(requests.get(image_file, stream=True).raw)
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(torch_device, torch.float16)
prompt = "USER: <|img|>\nWhat are the things I should be cautious about when I visit this place? ASSISTANT:"
raw_image = Image.open(requests.get(IMAGE_OF_VIEW_URL, stream=True).raw)
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(model.device, model.dtype)
output = model.generate(**inputs, max_new_tokens=900, do_sample=False)
EXPECTED_DECODED_TEXT = "USER: \nWhat are the things I should be cautious about when I visit this place? ASSISTANT: When visiting this place, which is a pier or dock extending over a body of water, there are a few things to be cautious about. First, be aware of the weather conditions, as sudden changes in weather can make the pier unsafe to walk on. Second, be mindful of the water depth and any potential hazards, such as submerged rocks or debris, that could cause accidents or injuries. Additionally, be cautious of the tides and currents, as they can change rapidly and pose a risk to swimmers or those who venture too close to the edge of the pier. Finally, be respectful of the environment and other visitors, and follow any posted rules or guidelines for the area." # fmt: skip
output = model.generate(**inputs, max_new_tokens=90, do_sample=False)
EXPECTED_DECODED_TEXT = Expectations(
{
("cuda", (8, 0)): "USER: \n What are the things I should be cautious about when I visit this place? ASSISTANT: When visiting this beautiful location, it's important to be mindful of a few things to ensure both your safety and the preservation of the environment. Firstly, always be cautious when walking on the wooden pier, as it can be slippery, especially during or after rain. Secondly, be aware of the local wildlife and do not feed or disturb them. Lastly, respect the natural surroundings by not littering and sticking to",
("rocm", (9, 5)): "USER: \n What are the things I should be cautious about when I visit this place? ASSISTANT: \n\nWhen visiting this place, you should be cautious about the following:\n\n1. **Weather Conditions**: The weather can be unpredictable, so it's important to check the forecast and dress in layers. Sudden changes in weather can occur, so be prepared for rain or cold temperatures.\n\n2. **Safety on the Dock**: The dock may be slippery, especially when",
}
).get_expectation() # fmt: off
decoded_output = processor.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
self.assertEqual(
processor.decode(output[0], skip_special_tokens=True),
decoded_output,
EXPECTED_DECODED_TEXT,
f"Expected: {repr(EXPECTED_DECODED_TEXT)}\nActual: {repr(decoded_output)}",
)
@slow
@ -310,53 +338,77 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
# Let's make sure we test the preprocessing to replace what is used
model_id = "rhymes-ai/Aria"
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
model_id,
quantization_config=BitsAndBytesConfig(load_in_4bit=True, llm_int8_skip_modules=["multihead_attn"]),
)
processor = AutoProcessor.from_pretrained(model_id)
prompts = [
"USER: <image>\nWhat are the things I should be cautious about when I visit this place? What should I bring with me? ASSISTANT:",
"USER: <image>\nWhat is this? ASSISTANT:",
"USER: <|img|>\nWhat are the things I should be cautious about when I visit this place? What should I bring with me? ASSISTANT:",
"USER: <|img|>\nWhat is this? ASSISTANT:",
]
image1 = Image.open(requests.get("https://aria-vl.github.io/static/images/view.jpg", stream=True).raw)
image1 = Image.open(requests.get(IMAGE_OF_VIEW_URL, stream=True).raw)
image2 = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw)
inputs = processor(images=[image1, image2], text=prompts, return_tensors="pt", padding=True)
inputs = processor(images=[image1, image2], text=prompts, return_tensors="pt", padding=True).to(
model.device, model.dtype
)
output = model.generate(**inputs, max_new_tokens=20)
EXPECTED_DECODED_TEXT = ['USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring with me? ASSISTANT: When visiting this place, which is a pier or dock extending over a body of water, you', 'USER: \nWhat is this? ASSISTANT: The image features two cats lying down on a pink couch. One cat is located on'] # fmt: skip
EXPECTED_DECODED_TEXT = Expectations(
{
("cuda", None): [
"USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring with me? ASSISTANT: When visiting this place, which is a pier or dock extending over a body of water, you",
"USER: \nWhat is this? ASSISTANT: The image features two cats lying down on a pink couch. One cat is located on",
],
("rocm", (9, 5)): [
"USER: \n What are the things I should be cautious about when I visit this place? What should I bring with me? ASSISTANT: \n\nWhen visiting this place, you should be cautious about the weather conditions, as it",
"USER: \n What is this? ASSISTANT: This is a picture of two cats sleeping on a couch. USER: What is the color of",
],
}
).get_expectation()
self.assertEqual(
processor.batch_decode(output, skip_special_tokens=True),
EXPECTED_DECODED_TEXT,
)
decoded_output = processor.batch_decode(output, skip_special_tokens=True)
self.assertEqual(decoded_output, EXPECTED_DECODED_TEXT)
@slow
@require_torch_large_accelerator
@require_bitsandbytes
def test_small_model_integration_test_batch(self):
# Let's make sure we test the preprocessing to replace what is used
model = AriaForConditionalGeneration.from_pretrained("rhymes-ai/Aria", load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
"rhymes-ai/Aria",
quantization_config=BitsAndBytesConfig(load_in_4bit=True, llm_int8_skip_modules=["multihead_attn"]),
)
# The first batch is longer in terms of text, but only has 1 image. The second batch will be padded in text, but the first will be padded because images take more space!.
prompts = [
"USER: <image>\nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT:",
"USER: <image>\nWhat is this?\nASSISTANT:",
"USER: <|img|>\nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT:",
"USER: <|img|>\nWhat is this?\nASSISTANT:",
]
image1 = Image.open(requests.get("https://aria-vl.github.io/static/images/view.jpg", stream=True).raw)
image1 = Image.open(requests.get(IMAGE_OF_VIEW_URL, stream=True).raw)
image2 = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw)
inputs = self.processor(images=[image1, image2], text=prompts, return_tensors="pt", padding=True)
inputs = self.processor(images=[image1, image2], text=prompts, return_tensors="pt", padding=True).to(
model.device, model.dtype
)
output = model.generate(**inputs, max_new_tokens=20)
EXPECTED_DECODED_TEXT = [
'USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT: When visiting this place, there are a few things to be cautious about and items to bring.',
'USER: \nWhat is this?\nASSISTANT: Cats'
] # fmt: skip
self.assertEqual(
self.processor.batch_decode(output, skip_special_tokens=True),
EXPECTED_DECODED_TEXT,
)
EXPECTED_DECODED_TEXT = Expectations({
("cuda", None): [
'USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT: When visiting this place, there are a few things to be cautious about and items to bring.',
'USER: \nWhat is this?\nASSISTANT: Cats',
],
("rocm", (9, 5)): [
'USER: \n What are the things I should be cautious about when I visit this place? What should I bring with me?\n ASSISTANT: \n\nWhen visiting this place, you should be cautious about the following:\n\n-',
'USER: \n What is this?\n ASSISTANT: This is a picture of two cats sleeping on a couch. The couch is red, and the cats',
],
}).get_expectation() # fmt: skip
decoded_output = self.processor.batch_decode(output, skip_special_tokens=True)
self.assertEqual(decoded_output, EXPECTED_DECODED_TEXT)
@slow
@require_torch_large_accelerator
@ -366,26 +418,31 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
model_id = "rhymes-ai/Aria"
# Multi-image & multi-prompt (e.g. 3 images and 2 prompts now fails with SDPA, this tests if "eager" works as before)
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True, attn_implementation="eager")
model = AriaForConditionalGeneration.from_pretrained(
model_id,
quantization_config=BitsAndBytesConfig(load_in_4bit=True, llm_int8_skip_modules=["multihead_attn"]),
)
processor = AutoProcessor.from_pretrained(model_id, pad_token="<pad>")
prompts = [
"USER: <image>\nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT:",
"USER: <image>\nWhat is this?\nASSISTANT: Two cats lying on a bed!\nUSER: <image>\nAnd this?\nASSISTANT:",
"USER: <|img|>\nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT:",
"USER: <|img|>\nWhat is this?\nASSISTANT: Two cats lying on a bed!\nUSER: <|img|>\nAnd this?\nASSISTANT:",
]
image1 = Image.open(requests.get("https://aria-vl.github.io/static/images/view.jpg", stream=True).raw)
image1 = Image.open(requests.get(IMAGE_OF_VIEW_URL, stream=True).raw)
image2 = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw)
inputs = processor(images=[image1, image2, image1], text=prompts, return_tensors="pt", padding=True)
inputs = inputs.to(model.device, model.dtype)
output = model.generate(**inputs, max_new_tokens=20)
EXPECTED_DECODED_TEXT = ['USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT: When visiting this place, which appears to be a dock or pier extending over a body of water', 'USER: \nWhat is this?\nASSISTANT: Two cats lying on a bed!\nUSER: \nAnd this?\nASSISTANT: A cat sleeping on a bed.'] # fmt: skip
EXPECTED_DECODED_TEXT = Expectations({
("cuda", None): ['USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT: When visiting this place, which appears to be a dock or pier extending over a body of water', 'USER: \nWhat is this?\nASSISTANT: Two cats lying on a bed!\nUSER: \nAnd this?\nASSISTANT: A cat sleeping on a bed.'],
("rocm", (9, 5)): ['USER: \n What are the things I should be cautious about when I visit this place? What should I bring with me?\n ASSISTANT: \n\nWhen visiting this place, you should be cautious about the weather conditions, as it', 'USER: \n What is this?\n ASSISTANT: Two cats lying on a bed!\n USER: \n And this?\n ASSISTANT: A serene lake scene with a wooden dock extending into the water.\n USER: \n']
}).get_expectation() # fmt: skip
self.assertEqual(
processor.batch_decode(output, skip_special_tokens=True),
EXPECTED_DECODED_TEXT,
)
decoded_output = processor.batch_decode(output, skip_special_tokens=True)
self.assertEqual(decoded_output, EXPECTED_DECODED_TEXT)
@slow
@require_torch_large_accelerator
@ -395,7 +452,8 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
# Skip multihead_attn for 4bit because MHA will read the original weight without dequantize.
# See https://github.com/huggingface/transformers/pull/37444#discussion_r2045852538.
model = AriaForConditionalGeneration.from_pretrained(
"rhymes-ai/Aria", load_in_4bit=True, llm_int8_skip_modules=["multihead_attn"]
"rhymes-ai/Aria",
quantization_config=BitsAndBytesConfig(load_in_4bit=True, llm_int8_skip_modules=["multihead_attn"]),
)
processor = AutoProcessor.from_pretrained("rhymes-ai/Aria")
@ -447,6 +505,10 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n <image>\n USER: What's the difference of two images?\n ASSISTANT:<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with",
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a patch of ground with some dry grass. The",
],
("rocm", (9, 5)): [
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n <image>\n USER: What's the difference of two images?\n ASSISTANT:<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image shows a cute golden retriever puppy sitting on a paved surface with a stick",
'<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young llama standing on a patch of ground with some dry grass and dirt. The'
],
}
) # fmt: skip
EXPECTED_OUTPUT = EXPECTED_OUTPUTS.get_expectation()
@ -480,9 +542,12 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
@require_bitsandbytes
def test_generation_no_images(self):
model_id = "rhymes-ai/Aria"
model = AriaForConditionalGeneration.from_pretrained(model_id, load_in_4bit=True)
model = AriaForConditionalGeneration.from_pretrained(
model_id,
quantization_config=BitsAndBytesConfig(load_in_4bit=True, llm_int8_skip_modules=["multihead_attn"]),
)
processor = AutoProcessor.from_pretrained(model_id)
assert model.device.type == "cuda", "This test is only supported on CUDA" # TODO: remove this
# Prepare inputs with no images
inputs = processor(text="Hello, I am", return_tensors="pt").to(torch_device)

View File

@ -18,7 +18,7 @@ import unittest
from transformers import DPTConfig
from transformers.file_utils import is_torch_available, is_vision_available
from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
@ -342,11 +342,15 @@ class DPTModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 384, 384))
self.assertEqual(predicted_depth.shape, expected_shape)
expected_slice = torch.tensor(
[[6.3199, 6.3629, 6.4148], [6.3850, 6.3615, 6.4166], [6.3519, 6.3176, 6.3575]]
).to(torch_device)
expectations = Expectations(
{
(None, None): [[6.3199, 6.3629, 6.4148], [6.3850, 6.3615, 6.4166], [6.3519, 6.3176, 6.3575]],
("cuda", 8): [[6.3215, 6.3635, 6.4155], [6.3863, 6.3622, 6.4174], [6.3530, 6.3184, 6.3583]],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.predicted_depth[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.predicted_depth[0, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)
def test_inference_semantic_segmentation(self):
image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large-ade")

View File

@ -17,7 +17,7 @@ import unittest
from transformers import Dinov2Config, DPTConfig
from transformers.file_utils import is_torch_available, is_vision_available
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device
from transformers.utils.import_utils import get_torch_major_and_minor_version
from ...test_configuration_common import ConfigTester
@ -267,11 +267,15 @@ class DPTModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 576, 736))
self.assertEqual(predicted_depth.shape, expected_shape)
expected_slice = torch.tensor(
[[6.0336, 7.1502, 7.4130], [6.8977, 7.2383, 7.2268], [7.9180, 8.0525, 8.0134]]
).to(torch_device)
expectations = Expectations(
{
(None, None): [[6.0336, 7.1502, 7.4130], [6.8977, 7.2383, 7.2268], [7.9180, 8.0525, 8.0134]],
("cuda", 8): [[6.0350, 7.1518, 7.4144], [6.8992, 7.2396, 7.2280], [7.9194, 8.0538, 8.0145]],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.predicted_depth[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.predicted_depth[0, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)
def test_inference_depth_estimation_beit(self):
image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-beit-base-384")
@ -289,11 +293,23 @@ class DPTModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 384, 384))
self.assertEqual(predicted_depth.shape, expected_shape)
expected_slice = torch.tensor(
[[2669.7061, 2663.7144, 2674.9399], [2633.9326, 2650.9092, 2665.4270], [2621.8271, 2632.0129, 2637.2290]]
).to(torch_device)
expectations = Expectations(
{
(None, None): [
[2669.7061, 2663.7144, 2674.9399],
[2633.9326, 2650.9092, 2665.4270],
[2621.8271, 2632.0129, 2637.2290],
],
("cuda", 8): [
[2669.4292, 2663.4121, 2674.6233],
[2633.7400, 2650.7026, 2665.2085],
[2621.6572, 2631.8452, 2637.0525],
],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.predicted_depth[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.predicted_depth[0, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)
def test_inference_depth_estimation_swinv2(self):
image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")
@ -311,8 +327,20 @@ class DPTModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 256, 256))
self.assertEqual(predicted_depth.shape, expected_shape)
expected_slice = torch.tensor(
[[1032.7719, 1025.1886, 1030.2661], [1023.7619, 1021.0075, 1024.9121], [1022.5667, 1018.8522, 1021.4145]]
).to(torch_device)
expectations = Expectations(
{
(None, None): [
[1032.7719, 1025.1886, 1030.2661],
[1023.7619, 1021.0075, 1024.9121],
[1022.5667, 1018.8522, 1021.4145],
],
("cuda", 8): [
[1032.7170, 1025.0629, 1030.1941],
[1023.7309, 1020.9786, 1024.8594],
[1022.5233, 1018.8235, 1021.3312],
],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.predicted_depth[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.predicted_depth[0, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)

View File

@ -194,6 +194,9 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_config(self):
self.config_tester.run_common_tests()
def test_batching_equivalence(self, atol=2e-5, rtol=2e-5):
super().test_batching_equivalence(atol=atol, rtol=rtol)
@unittest.skip(reason="DPT does not use inputs_embeds")
def test_inputs_embeds(self):
pass

View File

@ -24,7 +24,14 @@ from transformers import (
FastSpeech2ConformerWithHifiGanConfig,
is_torch_available,
)
from transformers.testing_utils import require_g2p_en, require_torch, require_torch_accelerator, slow, torch_device
from transformers.testing_utils import (
Expectations,
require_g2p_en,
require_torch,
require_torch_accelerator,
slow,
torch_device,
)
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_tensor
@ -373,24 +380,38 @@ class FastSpeech2ConformerModelIntegrationTest(unittest.TestCase):
# mel-spectrogram is too large (1, 205, 80), so only check top-left 100 elements
# fmt: off
expected_mel_spectrogram = torch.tensor(
[
[-1.2426, -1.7286, -1.6754, -1.7451, -1.6402, -1.5219, -1.4480, -1.3345, -1.4031, -1.4497],
[-0.7858, -1.4966, -1.3602, -1.4876, -1.2949, -1.0723, -1.0021, -0.7553, -0.6521, -0.6929],
[-0.7298, -1.3908, -1.0369, -1.2656, -1.0342, -0.7883, -0.7420, -0.5249, -0.3734, -0.3977],
[-0.4784, -1.3508, -1.1558, -1.4678, -1.2820, -1.0252, -1.0868, -0.9006, -0.8947, -0.8448],
[-0.3963, -1.2895, -1.2813, -1.6147, -1.4658, -1.2560, -1.4134, -1.2650, -1.3255, -1.1715],
[-1.4914, -1.3097, -0.3821, -0.3898, -0.5748, -0.9040, -1.0755, -1.0575, -1.2205, -1.0572],
[0.0197, -0.0582, 0.9147, 1.1512, 1.1651, 0.6628, -0.1010, -0.3085, -0.2285, 0.2650],
[1.1780, 0.1803, 0.7251, 1.5728, 1.6678, 0.4542, -0.1572, -0.1787, 0.0744, 0.8168],
[-0.2078, -0.3211, 1.1096, 1.5085, 1.4632, 0.6299, -0.0515, 0.0589, 0.8609, 1.4429],
[0.7831, -0.2663, 1.0352, 1.4489, 0.9088, 0.0247, -0.3995, 0.0078, 1.2446, 1.6998],
],
device=torch_device,
expectations = Expectations(
{
(None, None): [
[-1.2426, -1.7286, -1.6754, -1.7451, -1.6402, -1.5219, -1.4480, -1.3345, -1.4031, -1.4497],
[-0.7858, -1.4966, -1.3602, -1.4876, -1.2949, -1.0723, -1.0021, -0.7553, -0.6521, -0.6929],
[-0.7298, -1.3908, -1.0369, -1.2656, -1.0342, -0.7883, -0.7420, -0.5249, -0.3734, -0.3977],
[-0.4784, -1.3508, -1.1558, -1.4678, -1.2820, -1.0252, -1.0868, -0.9006, -0.8947, -0.8448],
[-0.3963, -1.2895, -1.2813, -1.6147, -1.4658, -1.2560, -1.4134, -1.2650, -1.3255, -1.1715],
[-1.4914, -1.3097, -0.3821, -0.3898, -0.5748, -0.9040, -1.0755, -1.0575, -1.2205, -1.0572],
[0.0197, -0.0582, 0.9147, 1.1512, 1.1651, 0.6628, -0.1010, -0.3085, -0.2285, 0.2650],
[1.1780, 0.1803, 0.7251, 1.5728, 1.6678, 0.4542, -0.1572, -0.1787, 0.0744, 0.8168],
[-0.2078, -0.3211, 1.1096, 1.5085, 1.4632, 0.6299, -0.0515, 0.0589, 0.8609, 1.4429],
[0.7831, -0.2663, 1.0352, 1.4489, 0.9088, 0.0247, -0.3995, 0.0078, 1.2446, 1.6998],
],
("cuda", 8): [
[-1.2425, -1.7282, -1.6750, -1.7448, -1.6400, -1.5217, -1.4478, -1.3341, -1.4026, -1.4493],
[-0.7858, -1.4967, -1.3601, -1.4875, -1.2950, -1.0725, -1.0021, -0.7553, -0.6522, -0.6929],
[-0.7303, -1.3911, -1.0370, -1.2656, -1.0345, -0.7888, -0.7423, -0.5251, -0.3737, -0.3979],
[-0.4784, -1.3506, -1.1556, -1.4677, -1.2820, -1.0253, -1.0868, -0.9006, -0.8949, -0.8448],
[-0.3968, -1.2896, -1.2811, -1.6145, -1.4660, -1.2564, -1.4135, -1.2652, -1.3258, -1.1716],
[-1.4912, -1.3092, -0.3812, -0.3886, -0.5737, -0.9034, -1.0749, -1.0571, -1.2202, -1.0567],
[0.0200, -0.0577, 0.9151, 1.1516, 1.1656, 0.6628, -0.1012, -0.3086, -0.2283, 0.2658],
[1.1778, 0.1805, 0.7255, 1.5732, 1.6680, 0.4539, -0.1572, -0.1785, 0.0751, 0.8175],
[-0.2088, -0.3212, 1.1101, 1.5085, 1.4625, 0.6293, -0.0522, 0.0587, 0.8615, 1.4432],
[0.7834, -0.2659, 1.0355, 1.4486, 0.9080, 0.0244, -0.3995, 0.0083, 1.2452, 1.6998],
],
}
)
expected_mel_spectrogram = torch.tensor(expectations.get_expectation()).to(torch_device)
# fmt: on
torch.testing.assert_close(spectrogram[0, :10, :10], expected_mel_spectrogram, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(spectrogram[0, :10, :10], expected_mel_spectrogram, rtol=2e-4, atol=2e-4)
self.assertEqual(spectrogram.shape, (1, 205, model.config.num_mel_bins))
def test_training_integration(self):

View File

@ -17,7 +17,7 @@ import collections
import unittest
from transformers import FocalNetConfig
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_backbone_common import BackboneTesterMixin
@ -425,8 +425,16 @@ class FocalNetModelIntegrationTest(unittest.TestCase):
# verify the logits
expected_shape = torch.Size((1, 1000))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([0.2166, -0.4368, 0.2191]).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
expectations = Expectations(
{
(None, None): [0.2166, -0.4368, 0.2191],
("cuda", 8): [0.2168, -0.4367, 0.2190],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)
self.assertTrue(outputs.logits.argmax(dim=-1).item(), 281)

View File

@ -164,6 +164,9 @@ class GLPNModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_batching_equivalence(self, atol=3e-4, rtol=3e-4):
super().test_batching_equivalence(atol=atol, rtol=rtol)
def test_for_depth_estimation(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_depth_estimation(*config_and_inputs)

View File

@ -681,25 +681,48 @@ class GroundingDinoModelIntegrationTests(unittest.TestCase):
expected_shape_logits = torch.Size((1, model.config.num_queries, model.config.d_model))
self.assertEqual(outputs.logits.shape, expected_shape_logits)
expected_boxes = torch.tensor(
[[0.7674, 0.4136, 0.4572], [0.2566, 0.5463, 0.4760], [0.2585, 0.5442, 0.4641]]
).to(torch_device)
expected_logits = torch.tensor(
[[-4.8913, -0.1900, -0.2161], [-4.9653, -0.3719, -0.3950], [-5.9599, -3.3765, -3.3104]]
).to(torch_device)
expectations = Expectations(
{
(None, None): [[0.7674, 0.4136, 0.4572], [0.2566, 0.5463, 0.4760], [0.2585, 0.5442, 0.4641]],
("cuda", 8): [[0.7674, 0.4135, 0.4571], [0.2566, 0.5463, 0.4760], [0.2585, 0.5442, 0.4640]],
}
)
expected_boxes = torch.tensor(expectations.get_expectation()).to(torch_device)
expectations = Expectations(
{
(None, None): [[-4.8913, -0.1900, -0.2161], [-4.9653, -0.3719, -0.3950], [-5.9599, -3.3765, -3.3104]],
("cuda", 8): [[-4.8927, -0.1910, -0.2169], [-4.9657, -0.3748, -0.3980], [-5.9579, -3.3812, -3.3153]],
}
)
expected_logits = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, rtol=1e-3, atol=1e-3)
expected_shape_boxes = torch.Size((1, model.config.num_queries, 4))
self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, rtol=2e-4, atol=2e-4)
# verify postprocessing
results = processor.image_processor.post_process_object_detection(
outputs, threshold=0.35, target_sizes=[(image.height, image.width)]
)[0]
expected_scores = torch.tensor([0.4526, 0.4082]).to(torch_device)
expected_slice_boxes = torch.tensor([344.8143, 23.1796, 637.4004, 373.8295]).to(torch_device)
expectations = Expectations(
{
(None, None): [[0.4526, 0.4082]],
("cuda", 8): [0.4524, 0.4074],
}
)
expected_scores = torch.tensor(expectations.get_expectation()).to(torch_device)
expectations = Expectations(
{
(None, None): [344.8143, 23.1796, 637.4004, 373.8295],
("cuda", 8): [344.8210, 23.1831, 637.3943, 373.8227],
}
)
expected_slice_boxes = torch.tensor(expectations.get_expectation()).to(torch_device)
self.assertEqual(len(results["scores"]), 2)
torch.testing.assert_close(results["scores"], expected_scores, rtol=1e-3, atol=1e-3)

View File

@ -262,6 +262,9 @@ class HieraModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
self.config_tester.check_config_can_be_init_without_params()
self.config_tester.check_config_arguments_init()
def test_batching_equivalence(self, atol=3e-4, rtol=3e-4):
super().test_batching_equivalence(atol=atol, rtol=rtol)
# Overriding as Hiera `get_input_embeddings` returns HieraPatchEmbeddings
def test_model_get_set_embeddings(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

View File

@ -19,7 +19,7 @@ from math import ceil, floor
from transformers import LevitConfig
from transformers.file_utils import cached_property, is_torch_available, is_vision_available
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
@ -406,6 +406,11 @@ class LevitModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 1000))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([1.0448, -0.3745, -1.8317]).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
expectations = Expectations(
{
(None, None): [1.0448, -0.3745, -1.8317],
("cuda", 8): [1.0453, -0.3739, -1.8314],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)

View File

@ -17,7 +17,7 @@ import unittest
from datasets import load_dataset
from transformers.models.lightglue.configuration_lightglue import LightGlueConfig
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import get_device_properties, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
@ -143,6 +143,13 @@ class LightGlueModelTest(ModelTesterMixin, unittest.TestCase):
self.config_tester.check_config_can_be_init_without_params()
self.config_tester.check_config_arguments_init()
def test_batching_equivalence(self, atol=1e-5, rtol=1e-5):
device_properties = get_device_properties()
if device_properties[0] == "cuda" and device_properties[1] == 8:
# TODO: (ydshieh) fix this
self.skipTest(reason="After switching to A10, this test always fails, but pass on CPU or T4.")
super().test_batching_equivalence(atol=atol, rtol=rtol)
@unittest.skip(reason="LightGlueForKeypointMatching does not use inputs_embeds")
def test_inputs_embeds(self):
pass

View File

@ -29,6 +29,7 @@ from transformers import (
is_vision_available,
)
from transformers.testing_utils import (
Expectations,
cleanup,
require_bitsandbytes,
require_torch,
@ -378,12 +379,16 @@ class LlavaNextVideoForConditionalGenerationIntegrationTest(unittest.TestCase):
# verify generation
output = model.generate(**inputs, do_sample=False, max_new_tokens=40)
EXPECTED_DECODED_TEXT = (
"USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while another child is attempting to read the same book. The child who is reading the book seems", # cuda output
"USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while wearing a pair of glasses that are too large for them. The glasses are", # xpu output
)
expected_decoded_text = Expectations(
{
("cuda", None): "USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while another child is attempting to read the same book. The child who is reading the book seems",
("xpu", None): "USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while wearing a pair of glasses that are too large for them. The glasses are",
("rocm", (9, 5)): "USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and adorable behavior of the young child. The child is seen reading a book, but instead of turning the pages like one would typically do, they",
}
).get_expectation() # fmt: off
self.assertTrue(self.processor.decode(output[0], skip_special_tokens=True) in EXPECTED_DECODED_TEXT)
decoded_text = self.processor.decode(output[0], skip_special_tokens=True)
self.assertEqual(decoded_text, expected_decoded_text)
@slow
@require_bitsandbytes
@ -400,15 +405,17 @@ class LlavaNextVideoForConditionalGenerationIntegrationTest(unittest.TestCase):
).to(torch_device)
output = model.generate(**inputs, do_sample=False, max_new_tokens=20)
decoded_text = self.processor.batch_decode(output, skip_special_tokens=True)
EXPECTED_DECODED_TEXT = [
'USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a',
'USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a'
] # fmt: skip
self.assertEqual(
self.processor.batch_decode(output, skip_special_tokens=True),
EXPECTED_DECODED_TEXT,
)
expected_decoded_text = Expectations(
{
("cuda", None): "USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a",
("rocm", (9, 5)): "USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and adorable behavior of the young child. The",
}
).get_expectation() # fmt: off
EXPECTED_DECODED_TEXT = [expected_decoded_text, expected_decoded_text]
self.assertEqual(decoded_text, EXPECTED_DECODED_TEXT)
@slow
@require_bitsandbytes
@ -435,8 +442,15 @@ class LlavaNextVideoForConditionalGenerationIntegrationTest(unittest.TestCase):
# verify generation
output = model.generate(**inputs, do_sample=False, max_new_tokens=50)
EXPECTED_DECODED_TEXT = 'USER: \nWhat is shown in this image? ASSISTANT: The image appears to be a graphical representation of a machine learning model\'s performance on a task, likely related to natural language processing or text understanding. It shows a scatter plot with two axes, one labeled "BLIP-2"' # fmt: skip
self.assertEqual(self.processor.decode(output[0], skip_special_tokens=True), EXPECTED_DECODED_TEXT)
EXPECTED_DECODED_TEXT = Expectations(
{
("rocm", (9, 5)): "USER: \nWhat is shown in this image? ASSISTANT: The image displays a chart that appears to be a comparison of different models or versions of a machine learning (ML) model, likely a neural network, based on their performance on a task or dataset. The chart is a scatter plot with axes labeled",
("cuda", None): 'USER: \nWhat is shown in this image? ASSISTANT: The image appears to be a graphical representation of a machine learning model\'s performance on a task, likely related to natural language processing or text understanding. It shows a scatter plot with two axes, one labeled "BLIP-2"',
}
).get_expectation() # fmt: off
decoded_text = self.processor.decode(output[0], skip_special_tokens=True)
self.assertEqual(decoded_text, EXPECTED_DECODED_TEXT)
@slow
@require_bitsandbytes

View File

@ -21,6 +21,7 @@ from tests.test_modeling_common import floats_tensor
from transformers import AutoModelForImageClassification, Mask2FormerConfig, is_torch_available, is_vision_available
from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4
from transformers.testing_utils import (
Expectations,
require_timm,
require_torch,
require_torch_accelerator,
@ -403,7 +404,7 @@ class Mask2FormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
)
TOLERANCE = 1e-4
TOLERANCE = 2e-4
# We will verify our results on an image of cute cats
@ -438,31 +439,52 @@ class Mask2FormerModelIntegrationTest(unittest.TestCase):
outputs = model(**inputs)
expected_slice_hidden_state = torch.tensor(
[[-0.2790, -1.0717, -1.1668], [-0.5128, -0.3128, -0.4987], [-0.5832, 0.1971, -0.0197]]
[
[-0.2790, -1.0717, -1.1668],
[-0.5128, -0.3128, -0.4987],
[-0.5832, 0.1971, -0.0197],
]
).to(torch_device)
self.assertTrue(
torch.allclose(
outputs.encoder_last_hidden_state[0, 0, :3, :3], expected_slice_hidden_state, atol=TOLERANCE
)
torch.testing.assert_close(
outputs.encoder_last_hidden_state[0, 0, :3, :3],
expected_slice_hidden_state,
atol=TOLERANCE,
rtol=TOLERANCE,
)
expected_slice_hidden_state = torch.tensor(
[[0.8973, 1.1847, 1.1776], [1.1934, 1.5040, 1.5128], [1.1153, 1.4486, 1.4951]]
).to(torch_device)
self.assertTrue(
torch.allclose(
outputs.pixel_decoder_last_hidden_state[0, 0, :3, :3], expected_slice_hidden_state, atol=TOLERANCE
)
expectations = Expectations(
{
(None, None): [
[0.8973, 1.1847, 1.1776],
[1.1934, 1.5040, 1.5128],
[1.1153, 1.4486, 1.4951],
],
("cuda", 8): [
[0.8974, 1.1848, 1.1777],
[1.1933, 1.5041, 1.5128],
[1.1154, 1.4487, 1.4950],
],
}
)
expected_slice_hidden_state = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.pixel_decoder_last_hidden_state[0, 0, :3, :3], expected_slice_hidden_state, atol=TOLERANCE,rtol=TOLERANCE) # fmt: skip
expected_slice_hidden_state = torch.tensor(
[[2.1152, 1.7000, -0.8603], [1.5808, 1.8004, -0.9353], [1.6043, 1.7495, -0.5999]]
).to(torch_device)
self.assertTrue(
torch.allclose(
outputs.transformer_decoder_last_hidden_state[0, :3, :3], expected_slice_hidden_state, atol=TOLERANCE
)
expectations = Expectations(
{
(None, None): [
[2.1152, 1.7000, -0.8603],
[1.5808, 1.8004, -0.9353],
[1.6043, 1.7495, -0.5999],
],
("cuda", 8): [
[2.1153, 1.7004, -0.8604],
[1.5807, 1.8007, -0.9354],
[1.6040, 1.7498, -0.6001],
],
}
)
expected_slice_hidden_state = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.transformer_decoder_last_hidden_state[0, :3, :3], expected_slice_hidden_state, atol=TOLERANCE, rtol=TOLERANCE) # fmt: skip
def test_inference_universal_segmentation_head(self):
model = Mask2FormerForUniversalSegmentation.from_pretrained(self.model_checkpoints).to(torch_device).eval()
@ -482,23 +504,40 @@ class Mask2FormerModelIntegrationTest(unittest.TestCase):
self.assertEqual(
masks_queries_logits.shape, (1, model.config.num_queries, inputs_shape[-2] // 4, inputs_shape[-1] // 4)
)
expected_slice = [
[-8.7839, -9.0056, -8.8121],
[-7.4104, -7.0313, -6.5401],
[-6.6105, -6.3427, -6.4675],
]
expected_slice = torch.tensor(expected_slice).to(torch_device)
expectations = Expectations(
{
(None, None): [
[-8.7839, -9.0056, -8.8121],
[-7.4104, -7.0313, -6.5401],
[-6.6105, -6.3427, -6.4675],
],
("cuda", 8): [
[-8.7809, -9.0041, -8.8087],
[-7.4075, -7.0307, -6.5385],
[-6.6088, -6.3417, -6.4627],
],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(masks_queries_logits[0, 0, :3, :3], expected_slice, rtol=TOLERANCE, atol=TOLERANCE)
# class_queries_logits
class_queries_logits = outputs.class_queries_logits
self.assertEqual(class_queries_logits.shape, (1, model.config.num_queries, model.config.num_labels + 1))
expected_slice = torch.tensor(
[
[1.8324, -8.0835, -4.1922],
[0.8450, -9.0050, -3.6053],
[0.3045, -7.7293, -3.0275],
]
).to(torch_device)
expectations = Expectations(
{
(None, None): [
[1.8324, -8.0835, -4.1922],
[0.8450, -9.0050, -3.6053],
[0.3045, -7.7293, -3.0275],
],
("cuda", 8): [
[1.8326, -8.0834, -4.1916],
[0.8446, -9.0048, -3.6048],
[0.3042, -7.7296, -3.0277],
],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(
outputs.class_queries_logits[0, :3, :3], expected_slice, rtol=TOLERANCE, atol=TOLERANCE
)

View File

@ -21,6 +21,7 @@ import numpy as np
from tests.test_modeling_common import floats_tensor
from transformers import DetrConfig, MaskFormerConfig, SwinConfig, is_torch_available, is_vision_available
from transformers.testing_utils import (
Expectations,
require_timm,
require_torch,
require_torch_accelerator,
@ -478,7 +479,7 @@ class MaskFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
self.assertEqual(model.model.pixel_level_module.encoder.out_indices, [1, 2, 3])
TOLERANCE = 1e-4
TOLERANCE = 2e-4
# We will verify our results on an image of cute cats
@ -513,31 +514,43 @@ class MaskFormerModelIntegrationTest(unittest.TestCase):
outputs = model(**inputs)
expected_slice_hidden_state = torch.tensor(
[[-0.0482, 0.9228, 0.4951], [-0.2547, 0.8017, 0.8527], [-0.0069, 0.3385, -0.0089]]
[
[-0.0482, 0.9228, 0.4951],
[-0.2547, 0.8017, 0.8527],
[-0.0069, 0.3385, -0.0089],
]
).to(torch_device)
self.assertTrue(
torch.allclose(
outputs.encoder_last_hidden_state[0, 0, :3, :3], expected_slice_hidden_state, atol=TOLERANCE
)
)
torch.allclose(outputs.encoder_last_hidden_state[0, 0, :3, :3], expected_slice_hidden_state, atol=TOLERANCE, rtol=TOLERANCE) # fmt: skip
expected_slice_hidden_state = torch.tensor(
[[-0.8422, -0.8434, -0.9718], [-1.0144, -0.5565, -0.4195], [-1.0038, -0.4484, -0.1961]]
).to(torch_device)
self.assertTrue(
torch.allclose(
outputs.pixel_decoder_last_hidden_state[0, 0, :3, :3], expected_slice_hidden_state, atol=TOLERANCE
)
expectations = Expectations(
{
(None, None): [[-0.8422, -0.8434, -0.9718], [-1.0144, -0.5565, -0.4195], [-1.0038, -0.4484, -0.1961]],
("cuda", 8): [
[-0.8422, -0.8435, -0.9717],
[-1.0145, -0.5564, -0.4195],
[-1.0040, -0.4486, -0.1962],
],
}
)
expected_slice_hidden_state = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.allclose(outputs.pixel_decoder_last_hidden_state[0, 0, :3, :3], expected_slice_hidden_state, atol=TOLERANCE,rtol=TOLERANCE) # fmt: skip
expected_slice_hidden_state = torch.tensor(
[[0.2852, -0.0159, 0.9735], [0.6254, 0.1858, 0.8529], [-0.0680, -0.4116, 1.8413]]
).to(torch_device)
self.assertTrue(
torch.allclose(
outputs.transformer_decoder_last_hidden_state[0, :3, :3], expected_slice_hidden_state, atol=TOLERANCE
)
expectations = Expectations(
{
(None, None): [
[0.2852, -0.0159, 0.9735],
[0.6254, 0.1858, 0.8529],
[-0.0680, -0.4116, 1.8413],
],
("cuda", 8): [
[0.2853, -0.0162, 0.9736],
[0.6256, 0.1856, 0.8530],
[-0.0679, -0.4118, 1.8416],
],
}
)
expected_slice_hidden_state = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.allclose(outputs.transformer_decoder_last_hidden_state[0, :3, :3], expected_slice_hidden_state, atol=TOLERANCE, rtol=TOLERANCE) # fmt: skip
def test_inference_instance_segmentation_head(self):
model = (
@ -562,25 +575,42 @@ class MaskFormerModelIntegrationTest(unittest.TestCase):
masks_queries_logits.shape,
(1, model.config.decoder_config.num_queries, inputs_shape[-2] // 4, inputs_shape[-1] // 4),
)
expected_slice = [
[-1.3737124, -1.7724937, -1.9364233],
[-1.5977281, -1.9867939, -2.1523695],
[-1.5795398, -1.9269832, -2.093942],
]
expected_slice = torch.tensor(expected_slice).to(torch_device)
expectations = Expectations(
{
(None, None): [
[-1.3737124, -1.7724937, -1.9364233],
[-1.5977281, -1.9867939, -2.1523695],
[-1.5795398, -1.9269832, -2.093942],
],
("cuda", 8): [
[-1.3737, -1.7727, -1.9367],
[-1.5979, -1.9871, -2.1527],
[-1.5797, -1.9271, -2.0941],
],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(masks_queries_logits[0, 0, :3, :3], expected_slice, rtol=TOLERANCE, atol=TOLERANCE)
# class_queries_logits
class_queries_logits = outputs.class_queries_logits
self.assertEqual(
class_queries_logits.shape, (1, model.config.decoder_config.num_queries, model.config.num_labels + 1)
)
expected_slice = torch.tensor(
[
[1.6512e00, -5.2572e00, -3.3519e00],
[3.6169e-02, -5.9025e00, -2.9313e00],
[1.0766e-04, -7.7630e00, -5.1263e00],
]
).to(torch_device)
expectations = Expectations(
{
(None, None): [
[1.6512e00, -5.2572e00, -3.3519e00],
[3.6169e-02, -5.9025e00, -2.9313e00],
[1.0766e-04, -7.7630e00, -5.1263e00],
],
("cuda", 8): [
[1.6507e00, -5.2568e00, -3.3520e00],
[3.5767e-02, -5.9023e00, -2.9313e00],
[-6.2712e-04, -7.7627e00, -5.1268e00],
],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(
outputs.class_queries_logits[0, :3, :3], expected_slice, rtol=TOLERANCE, atol=TOLERANCE
)
@ -608,17 +638,34 @@ class MaskFormerModelIntegrationTest(unittest.TestCase):
masks_queries_logits.shape,
(1, model.config.decoder_config.num_queries, inputs_shape[-2] // 4, inputs_shape[-1] // 4),
)
expected_slice = [[-0.9046, -2.6366, -4.6062], [-3.4179, -5.7890, -8.8057], [-4.9179, -7.6560, -10.7711]]
expected_slice = torch.tensor(expected_slice).to(torch_device)
expectations = Expectations(
{
(None, None): [[-0.9046, -2.6366, -4.6062], [-3.4179, -5.7890, -8.8057], [-4.9179, -7.6560, -10.7711]],
("cuda", 8): [[-0.9000, -2.6283, -4.5964], [-3.4123, -5.7789, -8.7919], [-4.9132, -7.6444, -10.7557]],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(masks_queries_logits[0, 0, :3, :3], expected_slice, rtol=TOLERANCE, atol=TOLERANCE)
# class_queries_logits
class_queries_logits = outputs.class_queries_logits
self.assertEqual(
class_queries_logits.shape, (1, model.config.decoder_config.num_queries, model.config.num_labels + 1)
)
expected_slice = torch.tensor(
[[4.7188, -3.2585, -2.8857], [6.6871, -2.9181, -1.2487], [7.2449, -2.2764, -2.1874]]
).to(torch_device)
expectations = Expectations(
{
(None, None): [
[4.7188, -3.2585, -2.8857],
[6.6871, -2.9181, -1.2487],
[7.2449, -2.2764, -2.1874],
],
("cuda", 8): [
[4.7177, -3.2586, -2.8853],
[6.6845, -2.9186, -1.2491],
[7.2443, -2.2760, -2.1858],
],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(
outputs.class_queries_logits[0, :3, :3], expected_slice, rtol=TOLERANCE, atol=TOLERANCE
)

View File

@ -140,6 +140,9 @@ class MgpstrModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_batching_equivalence(self, atol=1e-4, rtol=1e-4):
super().test_batching_equivalence(atol=atol, rtol=rtol)
@unittest.skip(reason="MgpstrModel does not use inputs_embeds")
def test_inputs_embeds(self):
pass

View File

@ -20,6 +20,7 @@ import pytest
from transformers import MiniMaxConfig, is_torch_available
from transformers.cache_utils import Cache
from transformers.testing_utils import (
Expectations,
require_flash_attn,
require_torch,
require_torch_accelerator,
@ -250,15 +251,20 @@ class MiniMaxIntegrationTest(unittest.TestCase):
model_id,
torch_dtype=torch.bfloat16,
).to(torch_device)
expected_slice = torch.tensor(
[[1.0312, -0.5156, -0.3262], [-0.1152, 0.4336, 0.2412], [1.2188, -0.5898, -0.0381]]
).to(torch_device)
with torch.no_grad():
logits = model(dummy_input).logits
logits = logits.float()
expectations = Expectations(
{
(None, None): [[1.0312, -0.5156, -0.3262], [-0.1152, 0.4336, 0.2412], [1.2188, -0.5898, -0.0381]],
("cuda", 8): [[1.0312, -0.5156, -0.3203], [-0.1201, 0.4375, 0.2402], [1.2188, -0.5898, -0.0396]],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(logits[0, :3, :3], expected_slice, atol=1e-3, rtol=1e-3)
torch.testing.assert_close(logits[1, :3, :3], expected_slice, atol=1e-3, rtol=1e-3)

View File

@ -191,27 +191,26 @@ class MixtralIntegrationTest(unittest.TestCase):
# ("cuda", 8) for A100/A10, and ("cuda", 7) for T4.
#
# considering differences in hardware processing and potential deviations in generated text.
# fmt: off
EXPECTED_LOGITS_LEFT_UNPADDED = Expectations(
{
("xpu", 3): torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7070, 0.2461]]).to(torch_device),
("cuda", 7): torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2275, 0.6054], [0.2656, -0.7070, 0.2460]]).to(torch_device),
("cuda", 8): torch.Tensor([[0.2207, 0.5234, -0.3828], [0.8203, -0.2285, 0.6055], [0.2656, -0.7109, 0.2451]]).to(torch_device),
("rocm", 9): torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]]).to(torch_device),
("xpu", 3): [[0.2236, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7070, 0.2461]],
("cuda", 7): [[0.2236, 0.5195, -0.3828], [0.8203, -0.2275, 0.6054], [0.2656, -0.7070, 0.2460]],
("cuda", 8): [[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]],
("rocm", 9): [[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]],
}
)
expected_left_unpadded = EXPECTED_LOGITS_LEFT_UNPADDED.get_expectation()
expected_left_unpadded = torch.tensor(EXPECTED_LOGITS_LEFT_UNPADDED.get_expectation(), device=torch_device)
EXPECTED_LOGITS_RIGHT_UNPADDED = Expectations(
{
("xpu", 3): torch.Tensor([[0.2178, 0.1270, -0.1641], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to(torch_device),
("cuda", 7): torch.Tensor([[0.2167, 0.1269, -0.1640], [-0.3496, 0.2988, -1.0312], [0.0688, 0.7929, 0.8007]]).to(torch_device),
("cuda", 8): torch.Tensor([[0.2178, 0.1270, -0.1621], [-0.3496, 0.3008, -1.0312], [0.0693, 0.7930, 0.7969]]).to(torch_device),
("rocm", 9): torch.Tensor([[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]]).to(torch_device),
("xpu", 3): [[0.2178, 0.1270, -0.1641], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]],
("cuda", 7): [[0.2167, 0.1269, -0.1640], [-0.3496, 0.2988, -1.0312], [0.0688, 0.7929, 0.8007]],
("cuda", 8): [[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]],
("rocm", 9): [[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]],
}
)
expected_right_unpadded = EXPECTED_LOGITS_RIGHT_UNPADDED.get_expectation()
# fmt: on
expected_right_unpadded = torch.tensor(EXPECTED_LOGITS_RIGHT_UNPADDED.get_expectation(), device=torch_device)
with torch.no_grad():
logits = model(dummy_input, attention_mask=attention_mask).logits

View File

@ -16,7 +16,7 @@
import unittest
from transformers import MobileNetV1Config
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
@ -246,6 +246,12 @@ class MobileNetV1ModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 1001))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([-4.1739, -1.1233, 3.1205]).to(torch_device)
expectations = Expectations(
{
(None, None): [-4.1739, -1.1233, 3.1205],
("cuda", 8): [-4.1725, -1.1238, 3.1191],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)

View File

@ -16,7 +16,7 @@
import unittest
from transformers import MobileNetV2Config
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
@ -301,9 +301,15 @@ class MobileNetV2ModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 1001))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([0.2445, -1.1993, 0.1905]).to(torch_device)
expectations = Expectations(
{
(None, None): [0.2445, -1.1993, 0.1905],
("cuda", 8): [0.2445, -1.1970, 0.1868],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)
@slow
def test_inference_semantic_segmentation(self):
@ -324,13 +330,20 @@ class MobileNetV2ModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 21, 65, 65))
self.assertEqual(logits.shape, expected_shape)
expected_slice = torch.tensor(
[
[[17.5790, 17.7581, 18.3355], [18.3257, 18.4230, 18.8973], [18.6169, 18.8650, 19.2187]],
[[-2.1595, -2.0977, -2.3741], [-2.4226, -2.3028, -2.6835], [-2.7819, -2.5991, -2.7706]],
[[4.2058, 4.8317, 4.7638], [4.4136, 5.0361, 4.9383], [4.5028, 4.9644, 4.8734]],
],
device=torch_device,
expectations = Expectations(
{
(None, None): [
[[17.5790, 17.7581, 18.3355], [18.3257, 18.4230, 18.8973], [18.6169, 18.8650, 19.2187]],
[[-2.1595, -2.0977, -2.3741], [-2.4226, -2.3028, -2.6835], [-2.7819, -2.5991, -2.7706]],
[[4.2058, 4.8317, 4.7638], [4.4136, 5.0361, 4.9383], [4.5028, 4.9644, 4.8734]],
],
("cuda", 8): [
[[17.5809, 17.7571, 18.3341], [18.3240, 18.4216, 18.8974], [18.6174, 18.8662, 19.2177]],
[[-2.1562, -2.0942, -2.3703], [-2.4199, -2.2999, -2.6818], [-2.7800, -2.5944, -2.7678]],
[[4.2092, 4.8356, 4.7694], [4.4181, 5.0401, 4.9409], [4.5089, 4.9700, 4.8802]],
],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(logits[0, :3, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(logits[0, :3, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)

View File

@ -16,7 +16,7 @@
import unittest
from transformers import MobileViTConfig
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
@ -304,9 +304,15 @@ class MobileViTModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 1000))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([-1.9364, -1.2327, -0.4653]).to(torch_device)
expectations = Expectations(
{
(None, None): [-1.9364, -1.2327, -0.4653],
("cuda", 8): [-1.9401, -1.2384, -0.4702],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)
@slow
def test_inference_semantic_segmentation(self):
@ -327,16 +333,23 @@ class MobileViTModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 21, 32, 32))
self.assertEqual(logits.shape, expected_shape)
expected_slice = torch.tensor(
[
[[6.9713, 6.9786, 7.2422], [7.2893, 7.2825, 7.4446], [7.6580, 7.8797, 7.9420]],
[[-10.6869, -10.3250, -10.3471], [-10.4228, -9.9868, -9.7132], [-11.0405, -11.0221, -10.7318]],
[[-3.3089, -2.8539, -2.6740], [-3.2706, -2.5621, -2.5108], [-3.2534, -2.6615, -2.6651]],
],
device=torch_device,
expectations = Expectations(
{
(None, None): [
[[6.9713, 6.9786, 7.2422], [7.2893, 7.2825, 7.4446], [7.6580, 7.8797, 7.9420]],
[[-10.6869, -10.3250, -10.3471], [-10.4228, -9.9868, -9.7132], [-11.0405, -11.0221, -10.7318]],
[[-3.3089, -2.8539, -2.6740], [-3.2706, -2.5621, -2.5108], [-3.2534, -2.6615, -2.6651]],
],
("cuda", 8): [
[[6.9661, 6.9753, 7.2386], [7.2864, 7.2785, 7.4429], [7.6577, 7.8770, 7.9387]],
[[-10.7046, -10.3411, -10.3641], [-10.4402, -10.0004, -9.7269], [-11.0579, -11.0358, -10.7459]],
[[-3.3022, -2.8465, -2.6661], [-3.2654, -2.5542, -2.5055], [-3.2477, -2.6544, -2.6562]],
],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(logits[0, :3, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(logits[0, :3, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)
@slow
def test_post_processing_semantic_segmentation(self):

View File

@ -16,7 +16,14 @@
import unittest
from transformers import MobileViTV2Config
from transformers.testing_utils import require_torch, require_torch_multi_gpu, require_vision, slow, torch_device
from transformers.testing_utils import (
Expectations,
require_torch,
require_torch_multi_gpu,
require_vision,
slow,
torch_device,
)
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
@ -317,9 +324,15 @@ class MobileViTV2ModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 1000))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([-1.6336e00, -7.3204e-02, -5.1883e-01]).to(torch_device)
expectations = Expectations(
{
(None, None): [-1.6336e00, -7.3204e-02, -5.1883e-01],
("cuda", 8): [-1.6341, -0.0665, -0.5158],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)
@slow
def test_inference_semantic_segmentation(self):
@ -340,16 +353,23 @@ class MobileViTV2ModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 21, 32, 32))
self.assertEqual(logits.shape, expected_shape)
expected_slice = torch.tensor(
[
[[7.0863, 7.1525, 6.8201], [6.6931, 6.8770, 6.8933], [6.2978, 7.0366, 6.9636]],
[[-3.7134, -3.6712, -3.6675], [-3.5825, -3.3549, -3.4777], [-3.3435, -3.3979, -3.2857]],
[[-2.9329, -2.8003, -2.7369], [-3.0564, -2.4780, -2.0207], [-2.6889, -1.9298, -1.7640]],
],
device=torch_device,
expectations = Expectations(
{
(None, None): [
[[7.0863, 7.1525, 6.8201], [6.6931, 6.8770, 6.8933], [6.2978, 7.0366, 6.9636]],
[[-3.7134, -3.6712, -3.6675], [-3.5825, -3.3549, -3.4777], [-3.3435, -3.3979, -3.2857]],
[[-2.9329, -2.8003, -2.7369], [-3.0564, -2.4780, -2.0207], [-2.6889, -1.9298, -1.7640]],
],
("cuda", 8): [
[[7.0866, 7.1509, 6.8188], [6.6935, 6.8757, 6.8927], [6.2988, 7.0365, 6.9631]],
[[-3.7113, -3.6686, -3.6643], [-3.5801, -3.3516, -3.4739], [-3.3432, -3.3966, -3.2832]],
[[-2.9359, -2.8037, -2.7387], [-3.0595, -2.4798, -2.0222], [-2.6901, -1.9306, -1.7659]],
],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(logits[0, :3, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(logits[0, :3, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)
@slow
def test_post_processing_semantic_segmentation(self):

View File

@ -17,7 +17,7 @@ import copy
import unittest
from transformers import MoonshineConfig, is_torch_available
from transformers.testing_utils import cleanup, require_torch, slow, torch_device
from transformers.testing_utils import Expectations, cleanup, require_torch, slow, torch_device
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import (
@ -457,13 +457,15 @@ class MoonshineModelIntegrationTests(unittest.TestCase):
outputs = model.generate(**inputs, max_new_tokens=1, return_dict_in_generate=True, output_logits=True)
# fmt: off
EXPECTED_LOGITS = torch.tensor([
-9.1106, 4.5542, 6.3892, -6.8139, -7.2456, -7.9074, -7.2839, -7.6043, -8.0384, -7.8351,
-7.3867, -7.2450, -7.7420, -7.3912, -7.3866, -7.6979, -7.6420, -7.0504, -7.3979, -7.2483,
-8.0796, -7.3300, -7.3672, -6.8765, -7.6876, -7.2682, -6.9866, -6.7457, -7.6855, -7.3050,
])
expectations = Expectations(
{
(None, None): [-9.1106, 4.5542, 6.3892, -6.8139, -7.2456, -7.9074, -7.2839, -7.6043, -8.0384, -7.8351, -7.3867, -7.2450, -7.7420, -7.3912, -7.3866, -7.6979, -7.6420, -7.0504, -7.3979, -7.2483, -8.0796, -7.3300, -7.3672, -6.8765, -7.6876, -7.2682, -6.9866, -6.7457, -7.6855, -7.3050],
("cuda", 8): [-9.1107, 4.5538, 6.3902, -6.8141, -7.2459, -7.9076, -7.2842, -7.6045, -8.0387, -7.8354, -7.3869, -7.2453, -7.7423, -7.3914, -7.3869, -7.6982, -7.6422, -7.0507, -7.3982, -7.2486, -8.0798, -7.3302, -7.3675, -6.8769, -7.6878, -7.2684, -6.9868, -6.7459, -7.6858, -7.3052],
}
)
EXPECTED_LOGITS = torch.tensor(expectations.get_expectation()).to(torch_device)
# fmt: on
torch.testing.assert_close(outputs.logits[0][0, :30].cpu(), EXPECTED_LOGITS, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.logits[0][0, :30], EXPECTED_LOGITS, rtol=2e-4, atol=2e-4)
@slow
def test_base_logits_single(self):
@ -476,7 +478,7 @@ class MoonshineModelIntegrationTests(unittest.TestCase):
# fmt: off
EXPECTED_LOGITS = torch.tensor([
-6.7336, 1.9482, 5.2448, -8.0277, -7.9167, -7.8956, -7.9649, -7.9348, -8.1312, -8.0616,
-6.7336, 1.9482, 5.2448, -8.0277, -7.9167, -7.8956, -7.9649, -7.9348, -8.1312, -8.0616,
-8.1070, -7.7696, -7.8809, -7.9450, -8.1013, -7.8177, -7.8598, -7.8257, -7.8729, -7.9657,
-7.9310, -8.1024, -7.8699, -7.8231, -8.0752, -7.9764, -7.8127, -8.0536, -7.9492, -7.9290,
])
@ -493,9 +495,9 @@ class MoonshineModelIntegrationTests(unittest.TestCase):
outputs = model.generate(**inputs, max_new_tokens=1, return_dict_in_generate=True, output_logits=True)
# fmt: off
EXPECTED_LOGITS = torch.tensor([
[-8.0109, 5.0241, 4.5979, -6.8125, -7.1675, -7.8783, -7.2152, -7.5188, -7.9077, -7.7394],
[-4.4399, -1.4422, 6.6710, -6.8929, -7.3751, -7.0969, -6.5257, -7.0257, -7.2585, -7.0008],
[-10.0086, 3.2859, 0.7345, -6.5557, -6.8514, -6.5308, -6.4172, -6.9484, -6.6214, -6.6229],
[-8.0109, 5.0241, 4.5979, -6.8125, -7.1675, -7.8783, -7.2152, -7.5188, -7.9077, -7.7394],
[-4.4399, -1.4422, 6.6710, -6.8929, -7.3751, -7.0969, -6.5257, -7.0257, -7.2585, -7.0008],
[-10.0086, 3.2859, 0.7345, -6.5557, -6.8514, -6.5308, -6.4172, -6.9484, -6.6214, -6.6229],
[-10.8078, 4.0030, -0.0633, -5.0505, -5.3906, -5.4590, -5.2420, -5.4746, -5.2665, -5.3158]
])
# fmt: on
@ -512,10 +514,10 @@ class MoonshineModelIntegrationTests(unittest.TestCase):
# fmt: off
EXPECTED_LOGITS = torch.tensor([
[-7.7272, 1.4630, 5.2294, -7.7313, -7.6252, -7.6011, -7.6788, -7.6441, -7.8452, -7.7549],
[-6.2173, -0.5891, 7.9493, -7.0694, -6.9997, -6.9982, -7.0953, -7.0831, -7.1686, -7.0137],
[-7.3184, 3.1192, 3.8937, -5.7206, -5.8428, -5.7609, -5.9996, -5.8212, -5.8615, -5.8719],
[-9.5475, 1.0146, 4.1179, -5.9971, -6.0614, -6.0329, -6.2103, -6.0318, -6.0789, -6.0873]
[-7.7272, 1.4630, 5.2294, -7.7313, -7.6252, -7.6011, -7.6788, -7.6441, -7.8452, -7.7549],
[-6.2173, -0.5891, 7.9493, -7.0694, -6.9997, -6.9982, -7.0953, -7.0831, -7.1686, -7.0137],
[-7.3184, 3.1192, 3.8937, -5.7206, -5.8428, -5.7609, -5.9996, -5.8212, -5.8615, -5.8719],
[-9.5475, 1.0146, 4.1179, -5.9971, -6.0614, -6.0329, -6.2103, -6.0318, -6.0789, -6.0873]
])
# fmt: on

View File

@ -446,7 +446,8 @@ class MptIntegrationTests(unittest.TestCase):
input_text = "Hello"
expected_outputs = Expectations({
("cuda", None): "Hello, I'm a new user of the forum. I have a question about the \"Solaris",
(None, None): "Hello, I'm a new user of the forum. I have a question about the \"Solaris",
("cuda", 8): "Hello, I'm a new user of the forum. I have a question. I have a problem with",
("rocm", (9, 5)): "Hello, I'm a newbie to the forum. I have a question about the \"B\" in",
}) # fmt: off
expected_output = expected_outputs.get_expectation()
@ -468,10 +469,10 @@ class MptIntegrationTests(unittest.TestCase):
input_text = "Hello"
expected_outputs = Expectations({
(None, None): "Hello and welcome to the first episode of the new podcast, The Frugal Feminist.\n",
("rocm", (9, 5)): "Hello and welcome to the first day of the new release at The Stamp Man!\nToday we are",
("xpu", 3): "Hello and welcome to the first ever episode of the new and improved, and hopefully improved, podcast.\n",
("cuda", 7): "Hello and welcome to the first episode of the new podcast, The Frugal Feminist.\n",
("cuda", 8): "Hello and welcome to the first day of the new release countdown for the month of May!\nToday",
("cuda", 8): "Hello and welcome to the first ever episode of the new and improved, and hopefully improved, podcast.\n",
}) # fmt: off
expected_output = expected_outputs.get_expectation()
@ -499,13 +500,17 @@ class MptIntegrationTests(unittest.TestCase):
expected_outputs = Expectations(
{
(None, None): [
"Hello my name is Tiffany and I am a mother of two beautiful children. I have been a nanny for the",
"Today I am going at the gym and then I am going to go to the grocery store. I am going to buy some food and some",
],
("xpu", 3): [
"Hello my name is Tiffany. I am a mother of two beautiful children. I have been a nanny for over",
"Today I am going at the gym and then I am going to go to the mall with my mom. I am going to go to the",
],
("cuda", 7): [
"Hello my name is Tiffany and I am a mother of two beautiful children. I have been a nanny for the",
"Today I am going at the gym and then I am going to go to the grocery store. I am going to buy some food and some",
("cuda", 8): [
"Hello my name is Tiffany and I am a mother of two beautiful children. I have been a nanny for over",
"Today I am going at the gym and then I am going to go to the grocery store. I am going to make a list of things",
],
("rocm", (9, 5)): [
"Hello my name is Jasmine and I am a very sweet and loving dog. I am a very playful dog and I",
@ -534,8 +539,9 @@ class MptIntegrationTests(unittest.TestCase):
expected_slices = Expectations(
{
(None, None): torch.Tensor([-0.2520, -0.2178, -0.1953]),
("xpu", 3): torch.Tensor([-0.2090, -0.2061, -0.1465]),
("cuda", 7): torch.Tensor([-0.2520, -0.2178, -0.1953]),
("cuda", 8): torch.Tensor([-0.2559, -0.2227, -0.2217]),
# TODO: This is quite a bit off, check BnB
("rocm", (9, 5)): torch.Tensor([-0.3008, -0.1309, -0.1562]),
}

View File

@ -31,6 +31,7 @@ from transformers import (
T5Config,
)
from transformers.testing_utils import (
Expectations,
get_device_properties,
is_torch_available,
require_flash_attn,
@ -1377,16 +1378,17 @@ class MusicgenIntegrationTests(unittest.TestCase):
output_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=10)
# fmt: off
EXPECTED_VALUES = torch.tensor(
[
-0.0099, -0.0140, 0.0079, 0.0080, -0.0046, 0.0065, -0.0068, -0.0185,
0.0105, 0.0059, 0.0329, 0.0249, -0.0204, -0.0341, -0.0465, 0.0053,
]
expectations = Expectations(
{
(None, None): [-0.0099, -0.0140, 0.0079, 0.0080, -0.0046, 0.0065, -0.0068, -0.0185, 0.0105, 0.0059, 0.0329, 0.0249, -0.0204, -0.0341, -0.0465, 0.0053],
("cuda", 8): [-0.0099, -0.0140, 0.0079, 0.0080, -0.0046, 0.0065, -0.0068, -0.0185, 0.0105, 0.0058, 0.0328, 0.0249, -0.0205, -0.0342, -0.0466, 0.0052],
}
)
EXPECTED_VALUES = torch.tensor(expectations.get_expectation()).to(torch_device)
# fmt: on
self.assertTrue(output_values.shape == (2, 1, 4480))
torch.testing.assert_close(output_values[0, 0, :16].cpu(), EXPECTED_VALUES, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(output_values[0, 0, :16], EXPECTED_VALUES, rtol=2e-4, atol=2e-4)
@slow
def test_generate_text_prompt_greedy(self):
@ -1459,16 +1461,17 @@ class MusicgenIntegrationTests(unittest.TestCase):
)
# fmt: off
EXPECTED_VALUES = torch.tensor(
[
-0.0111, -0.0154, 0.0047, 0.0058, -0.0068, 0.0012, -0.0109, -0.0229,
0.0010, -0.0038, 0.0167, 0.0042, -0.0421, -0.0610, -0.0764, -0.0326,
]
expectations = Expectations(
{
(None, None): [-0.0111, -0.0154, 0.0047, 0.0058, -0.0068, 0.0012, -0.0109, -0.0229, 0.0010, -0.0038, 0.0167, 0.0042, -0.0421, -0.0610, -0.0764, -0.0326],
("cuda", 8): [-0.0110, -0.0153, 0.0048, 0.0058, -0.0068, 0.0012, -0.0109, -0.0229, 0.0010, -0.0037, 0.0168, 0.0042, -0.0420, -0.0609, -0.0763, -0.0326],
}
)
EXPECTED_VALUES = torch.tensor(expectations.get_expectation()).to(torch_device)
# fmt: on
self.assertTrue(output_values.shape == (2, 1, 4480))
torch.testing.assert_close(output_values[0, 0, :16].cpu(), EXPECTED_VALUES, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(output_values[0, 0, :16], EXPECTED_VALUES, rtol=2e-4, atol=2e-4)
@slow
def test_generate_text_audio_prompt(self):
@ -1521,13 +1524,13 @@ class MusicgenStereoIntegrationTests(unittest.TestCase):
# fmt: off
EXPECTED_VALUES_LEFT = torch.tensor(
[
0.0017, 0.0004, 0.0004, 0.0005, 0.0002, 0.0002, -0.0002, -0.0013,
0.0017, 0.0004, 0.0004, 0.0005, 0.0002, 0.0002, -0.0002, -0.0013,
-0.0010, -0.0015, -0.0018, -0.0032, -0.0060, -0.0082, -0.0096, -0.0099,
]
)
EXPECTED_VALUES_RIGHT = torch.tensor(
[
0.0038, 0.0028, 0.0031, 0.0032, 0.0031, 0.0032, 0.0030, 0.0019,
0.0038, 0.0028, 0.0031, 0.0032, 0.0031, 0.0032, 0.0030, 0.0019,
0.0021, 0.0015, 0.0009, -0.0008, -0.0040, -0.0067, -0.0087, -0.0096,
]
)
@ -1555,13 +1558,13 @@ class MusicgenStereoIntegrationTests(unittest.TestCase):
# fmt: off
EXPECTED_VALUES_LEFT = torch.tensor(
[
0.2535, 0.2008, 0.1471, 0.0896, 0.0306, -0.0200, -0.0501, -0.0728,
0.2535, 0.2008, 0.1471, 0.0896, 0.0306, -0.0200, -0.0501, -0.0728,
-0.0832, -0.0856, -0.0867, -0.0884, -0.0864, -0.0866, -0.0744, -0.0430,
]
)
EXPECTED_VALUES_RIGHT = torch.tensor(
[
0.1695, 0.1213, 0.0732, 0.0239, -0.0264, -0.0705, -0.0935, -0.1103,
0.1695, 0.1213, 0.0732, 0.0239, -0.0264, -0.0705, -0.0935, -0.1103,
-0.1163, -0.1139, -0.1104, -0.1082, -0.1027, -0.1004, -0.0900, -0.0614,
]
)

View File

@ -30,6 +30,7 @@ from transformers import (
T5Config,
)
from transformers.testing_utils import (
Expectations,
get_device_properties,
is_torch_available,
is_torchaudio_available,
@ -1472,16 +1473,17 @@ class MusicgenMelodyIntegrationTests(unittest.TestCase):
)
# fmt: off
EXPECTED_VALUES = torch.tensor(
[
-0.0165, -0.0222, -0.0041, -0.0058, -0.0145, -0.0023, -0.0160, -0.0310,
-0.0055, -0.0127, 0.0104, 0.0105, -0.0326, -0.0611, -0.0744, -0.0083
]
expectations = Expectations(
{
(None, None): [-0.0165, -0.0222, -0.0041, -0.0058, -0.0145, -0.0023, -0.0160, -0.0310, -0.0055, -0.0127, 0.0104, 0.0105, -0.0326, -0.0611, -0.0744, -0.0083],
("cuda", 8): [-0.0165, -0.0221, -0.0040, -0.0058, -0.0145, -0.0024, -0.0160, -0.0310, -0.0055, -0.0127, 0.0104, 0.0105, -0.0326, -0.0612, -0.0744, -0.0082],
}
)
EXPECTED_VALUES = torch.tensor(expectations.get_expectation()).to(torch_device)
# fmt: on
self.assertTrue(output_values.shape == (2, 1, 4480))
torch.testing.assert_close(output_values[0, 0, :16].cpu(), EXPECTED_VALUES, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(output_values[0, 0, :16], EXPECTED_VALUES, rtol=2e-4, atol=2e-4)
@slow
def test_generate_text_audio_prompt(self):

View File

@ -21,6 +21,7 @@ import numpy as np
from tests.test_modeling_common import floats_tensor
from transformers import AutoModelForImageClassification, OneFormerConfig, is_torch_available, is_vision_available
from transformers.testing_utils import (
Expectations,
is_flaky,
require_timm,
require_torch,
@ -528,7 +529,7 @@ class OneFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
self.assertEqual(model.model.pixel_level_module.encoder.out_indices, [1, 2, 3])
TOLERANCE = 1e-4
TOLERANCE = 2e-4
# We will verify our results on an image of cute cats
@ -574,12 +575,15 @@ class OneFormerModelIntegrationTest(unittest.TestCase):
slice_hidden_state = outputs.pixel_decoder_hidden_states[0][0, 0, :3, :3]
torch.testing.assert_close(slice_hidden_state, expected_slice_hidden_state, atol=TOLERANCE, rtol=TOLERANCE)
# fmt: off
expected_slice_hidden_state = [[3.0668, -1.1833, -5.1103], [3.344, -3.362, -5.1101], [2.6017, -4.3613, -4.1444]]
expected_slice_hidden_state = torch.tensor(expected_slice_hidden_state).to(torch_device)
expectations = Expectations(
{
(None, None): [[3.0668, -1.1833, -5.1103], [3.344, -3.362, -5.1101], [2.6017, -4.3613, -4.1444]],
("cuda", 8): [[3.0590, -1.1903, -5.1119], [3.3919, -3.3547, -5.1469], [2.6041, -4.3592, -4.1406]],
}
)
expected_slice_hidden_state = torch.tensor(expectations.get_expectation()).to(torch_device)
slice_hidden_state = outputs.transformer_decoder_class_predictions[0, :3, :3]
torch.testing.assert_close(slice_hidden_state, expected_slice_hidden_state, atol=TOLERANCE, rtol=TOLERANCE)
# fmt: on
def test_inference_universal_segmentation_head(self):
model = OneFormerForUniversalSegmentation.from_pretrained(self.model_checkpoints).to(torch_device).eval()
@ -599,8 +603,13 @@ class OneFormerModelIntegrationTest(unittest.TestCase):
masks_queries_logits.shape,
(1, model.config.num_queries, inputs_shape[-2] // 4, (inputs_shape[-1] + 2) // 4),
)
expected_slice = [[3.1848, 4.2141, 4.1993], [2.9000, 3.5721, 3.6603], [2.5358, 3.0883, 3.6168]]
expected_slice = torch.tensor(expected_slice).to(torch_device)
expectations = Expectations(
{
(None, None): [[3.1848, 4.2141, 4.1993], [2.9000, 3.5721, 3.6603], [2.5358, 3.0883, 3.6168]],
("cuda", 8): [[3.1687, 4.1893, 4.1742], [2.8768, 3.5380, 3.6257], [2.5121, 3.0552, 3.5822]],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(masks_queries_logits[0, 0, :3, :3], expected_slice, rtol=TOLERANCE, atol=TOLERANCE)
# class_queries_logits
@ -609,8 +618,13 @@ class OneFormerModelIntegrationTest(unittest.TestCase):
class_queries_logits.shape,
(1, model.config.num_queries, model.config.num_labels + 1),
)
expected_slice = [[3.0668, -1.1833, -5.1103], [3.3440, -3.3620, -5.1101], [2.6017, -4.3613, -4.1444]]
expected_slice = torch.tensor(expected_slice).to(torch_device)
expectations = Expectations(
{
(None, None): [[3.0668, -1.1833, -5.1103], [3.3440, -3.3620, -5.1101], [2.6017, -4.3613, -4.1444]],
("cuda", 8): [[3.0590, -1.1903, -5.1119], [3.3919, -3.3547, -5.1469], [2.6041, -4.3592, -4.1406]],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(class_queries_logits[0, :3, :3], expected_slice, rtol=TOLERANCE, atol=TOLERANCE)
@require_torch_accelerator

View File

@ -17,7 +17,7 @@ import unittest
from transformers import is_torch_available, is_vision_available
from transformers.models.auto import get_values
from transformers.testing_utils import require_torch, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, slow, torch_device
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
@ -144,6 +144,9 @@ class PoolFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_batching_equivalence(self, atol=2e-4, rtol=2e-4):
super().test_batching_equivalence(atol=atol, rtol=rtol)
@unittest.skip(reason="PoolFormer does not use inputs_embeds")
def test_inputs_embeds(self):
pass
@ -235,5 +238,11 @@ class PoolFormerModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 1000))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([-0.6113, 0.1685, -0.0492]).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
expectations = Expectations(
{
(None, None): [-0.6113, 0.1685, -0.0492],
("cuda", 8): [-0.6112, 0.1690, -0.0481],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)

View File

@ -17,6 +17,7 @@ import unittest
from transformers import is_torch_available, is_vision_available
from transformers.testing_utils import (
Expectations,
require_accelerate,
require_torch,
require_torch_accelerator,
@ -153,6 +154,9 @@ class PvtModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
self.model_tester = PvtModelTester(self)
self.config_tester = PvtConfigTester(self, config_class=PvtConfig)
def test_batching_equivalence(self, atol=1e-4, rtol=1e-4):
super().test_batching_equivalence(atol=atol, rtol=rtol)
def test_config(self):
self.config_tester.run_common_tests()
@ -257,9 +261,15 @@ class PvtModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, model.config.num_labels))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([-1.4192, -1.9158, -0.9702]).to(torch_device)
expectations = Expectations(
{
(None, None): [-1.4192, -1.9158, -0.9702],
("cuda", 8): [-1.4194, -1.9161, -0.9705],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)
@slow
def test_inference_model(self):
@ -278,11 +288,15 @@ class PvtModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 50, 512))
self.assertEqual(outputs.last_hidden_state.shape, expected_shape)
expected_slice = torch.tensor(
[[-0.3086, 1.0402, 1.1816], [-0.2880, 0.5781, 0.6124], [0.1480, 0.6129, -0.0590]]
).to(torch_device)
expectations = Expectations(
{
(None, None): [[-0.3086, 1.0402, 1.1816], [-0.2880, 0.5781, 0.6124], [0.1480, 0.6129, -0.0590]],
("cuda", 8): [[-0.3084, 1.0402, 1.1816], [-0.2883, 0.5781, 0.6123], [0.1487, 0.6119, -0.0584]],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.last_hidden_state[0, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)
@slow
@require_accelerate

View File

@ -167,6 +167,9 @@ class PvtV2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_batching_equivalence(self, atol=5e-4, rtol=5e-4):
super().test_batching_equivalence(atol=atol, rtol=rtol)
@unittest.skip(reason="Pvt-V2 does not use inputs_embeds")
def test_inputs_embeds(self):
pass

View File

@ -17,7 +17,7 @@ import unittest
from transformers import RegNetConfig
from transformers.file_utils import cached_property, is_torch_available, is_vision_available
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
@ -146,6 +146,9 @@ class RegNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_config(self):
self.config_tester.run_common_tests()
def test_batching_equivalence(self, atol=3e-5, rtol=3e-5):
super().test_batching_equivalence(atol=atol, rtol=rtol)
@unittest.skip(reason="RegNet does not use inputs_embeds")
def test_inputs_embeds(self):
pass
@ -248,6 +251,11 @@ class RegNetModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 1000))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([-0.4180, -1.5051, -3.4836]).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
expectations = Expectations(
{
(None, None): [-0.4180, -1.5051, -3.4836],
("cuda", 8): [-0.4168, -1.5056, -3.4836],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)

View File

@ -16,7 +16,7 @@
import unittest
from transformers import ResNetConfig
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_backbone_common import BackboneTesterMixin
@ -301,9 +301,14 @@ class ResNetModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 1000))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([-11.1069, -9.7877, -8.3777]).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
expectations = Expectations(
{
(None, None): [-11.1069, -9.7877, -8.3777],
("cuda", 8): [-11.1112, -9.7916, -8.3788],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)
@require_torch

View File

@ -29,6 +29,7 @@ from transformers import (
is_vision_available,
)
from transformers.testing_utils import (
Expectations,
require_torch,
require_torch_accelerator,
require_vision,
@ -732,45 +733,69 @@ class RTDetrModelIntegrationTest(unittest.TestCase):
expected_shape_logits = torch.Size((1, 300, model.config.num_labels))
self.assertEqual(outputs.logits.shape, expected_shape_logits)
expected_logits = torch.tensor(
[
[-4.64763879776001, -5.001153945922852, -4.978509902954102],
[-4.159348487854004, -4.703853607177734, -5.946484565734863],
[-4.437461853027344, -4.65836238861084, -6.235235691070557],
]
).to(torch_device)
expected_boxes = torch.tensor(
[
[0.1688060760498047, 0.19992263615131378, 0.21225441992282867],
[0.768376350402832, 0.41226309537887573, 0.4636859893798828],
[0.25953856110572815, 0.5483334064483643, 0.4777486026287079],
]
).to(torch_device)
expectations = Expectations(
{
(None, None): [
[-4.64763879776001, -5.001153945922852, -4.978509902954102],
[-4.159348487854004, -4.703853607177734, -5.946484565734863],
[-4.437461853027344, -4.65836238861084, -6.235235691070557],
],
("cuda", 8): [[-4.6471, -5.0008, -4.9786], [-4.1599, -4.7041, -5.9458], [-4.4374, -4.6582, -6.2340]],
}
)
expected_logits = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, rtol=1e-4, atol=1e-4)
expectations = Expectations(
{
(None, None): [
[0.1688060760498047, 0.19992263615131378, 0.21225441992282867],
[0.768376350402832, 0.41226309537887573, 0.4636859893798828],
[0.25953856110572815, 0.5483334064483643, 0.4777486026287079],
],
("cuda", 8): [[0.1688, 0.1999, 0.2123], [0.7684, 0.4123, 0.4637], [0.2596, 0.5483, 0.4777]],
}
)
expected_boxes = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, rtol=2e-4, atol=2e-4)
expected_shape_boxes = torch.Size((1, 300, 4))
self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, rtol=2e-4, atol=2e-4)
# verify postprocessing
results = image_processor.post_process_object_detection(
outputs, threshold=0.0, target_sizes=[image.size[::-1]]
)[0]
expected_scores = torch.tensor(
[0.9703017473220825, 0.9599503874778748, 0.9575679302215576, 0.9506784677505493], device=torch_device
)
expected_labels = [57, 15, 15, 65]
expected_slice_boxes = torch.tensor(
[
[0.13774872, 0.37821293, 640.13074, 476.21088],
[343.38132, 24.276838, 640.1404, 371.49573],
[13.225126, 54.179348, 318.98422, 472.2207],
[40.114475, 73.44104, 175.9573, 118.48469],
],
device=torch_device,
)
torch.testing.assert_close(results["scores"][:4], expected_scores, rtol=1e-4, atol=1e-4)
expectations = Expectations(
{
(None, None): [0.9703017473220825, 0.9599503874778748, 0.9575679302215576, 0.9506784677505493],
("cuda", 8): [0.9704, 0.9599, 0.9576, 0.9507],
}
)
expected_scores = torch.tensor(expectations.get_expectation()).to(torch_device)
expected_labels = [57, 15, 15, 65]
expectations = Expectations(
{
(None, None): [
[0.13774872, 0.37821293, 640.13074, 476.21088],
[343.38132, 24.276838, 640.1404, 371.49573],
[13.225126, 54.179348, 318.98422, 472.2207],
[40.114475, 73.44104, 175.9573, 118.48469],
],
("cuda", 8): [
[1.4183e-01, 3.8063e-01, 6.4013e02, 4.7621e02],
[3.4338e02, 2.4275e01, 6.4014e02, 3.7150e02],
[1.3236e01, 5.4179e01, 3.1899e02, 4.7222e02],
[4.0114e01, 7.3441e01, 1.7596e02, 1.1848e02],
],
}
)
expected_slice_boxes = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(results["scores"][:4], expected_scores, rtol=2e-4, atol=2e-4)
self.assertSequenceEqual(results["labels"][:4].tolist(), expected_labels)
torch.testing.assert_close(results["boxes"][:4], expected_slice_boxes, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(results["boxes"][:4], expected_slice_boxes, rtol=2e-4, atol=2e-4)

View File

@ -28,6 +28,7 @@ from transformers import (
is_vision_available,
)
from transformers.testing_utils import (
Expectations,
require_torch,
require_torch_accelerator,
require_vision,
@ -736,42 +737,60 @@ class RTDetrV2ModelIntegrationTest(unittest.TestCase):
expected_shape_logits = torch.Size((1, 300, model.config.num_labels))
self.assertEqual(outputs.logits.shape, expected_shape_logits)
expected_logits = torch.tensor(
[
[-3.7047, -5.1914, -6.1787],
[-4.0108, -9.3449, -5.2047],
[-4.1287, -4.7461, -5.8633],
]
).to(torch_device)
expected_boxes = torch.tensor(
[
[0.2582, 0.5497, 0.4764],
[0.1684, 0.1985, 0.2120],
[0.7665, 0.4146, 0.4669],
]
).to(torch_device)
expectations = Expectations(
{
(None, None): [[-3.7047, -5.1914, -6.1787], [-4.0108, -9.3449, -5.2047], [-4.1287, -4.7461, -5.8633]],
("cuda", 8): [[-3.7039, -5.1923, -6.1787], [-4.0106, -9.3452, -5.2045], [-4.1285, -4.7468, -5.8641]],
}
)
expected_logits = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, atol=1e-4, rtol=1e-4)
expectations = Expectations(
{
(None, None): [[0.2582, 0.5497, 0.4764], [0.1684, 0.1985, 0.2120], [0.7665, 0.4146, 0.4669]],
}
)
expected_boxes = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3, :3], expected_logits, atol=2e-4, rtol=2e-4)
expected_shape_boxes = torch.Size((1, 300, 4))
self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=1e-4, rtol=1e-4)
torch.testing.assert_close(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=2e-4, rtol=2e-4)
# verify postprocessing
results = image_processor.post_process_object_detection(
outputs, threshold=0.0, target_sizes=[image.size[::-1]]
)[0]
expected_scores = torch.tensor([0.9652, 0.9599, 0.9462, 0.8613], device=torch_device)
expected_labels = [15, 15, 65, 57]
expected_slice_boxes = torch.tensor(
[
[3.4114e02, 2.5111e01, 6.3998e02, 3.7289e02],
[1.2780e01, 5.6346e01, 3.1767e02, 4.7134e02],
[3.9959e01, 7.3117e01, 1.7565e02, 1.1744e02],
[-1.0521e-01, 2.9717e00, 6.3989e02, 4.7362e02],
],
device=torch_device,
expectations = Expectations(
{
(None, None): [0.9652, 0.9599, 0.9462, 0.8613],
("cuda", 8): [0.9652, 0.9599, 0.9461, 0.8613],
}
)
self.assertTrue(torch.allclose(results["scores"][:4], expected_scores, atol=1e-3, rtol=1e-4))
expected_scores = torch.tensor(expectations.get_expectation()).to(torch_device)
expected_labels = [15, 15, 65, 57]
expectations = Expectations(
{
(None, None): [
[3.4114e02, 2.5111e01, 6.3998e02, 3.7289e02],
[1.2780e01, 5.6346e01, 3.1767e02, 4.7134e02],
[3.9959e01, 7.3117e01, 1.7565e02, 1.1744e02],
[-1.0521e-01, 2.9717e00, 6.3989e02, 4.7362e02],
],
("cuda", 8): [
[3.4115e02, 2.5109e01, 6.3997e02, 3.7290e02],
[1.2785e01, 5.6350e01, 3.1767e02, 4.7134e02],
[3.9959e01, 7.3117e01, 1.7565e02, 1.1744e02],
[-1.0471e-01, 2.9680e00, 6.3989e02, 4.7362e02],
],
}
)
expected_slice_boxes = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(results["scores"][:4], expected_scores, atol=1e-3, rtol=2e-4)
self.assertSequenceEqual(results["labels"][:4].tolist(), expected_labels)
torch.testing.assert_close(results["boxes"][:4], expected_slice_boxes, atol=1e-3, rtol=1e-4)
torch.testing.assert_close(results["boxes"][:4], expected_slice_boxes, atol=1e-3, rtol=2e-4)

View File

@ -19,7 +19,7 @@ import unittest
import requests
from transformers import SamConfig, SamMaskDecoderConfig, SamPromptEncoderConfig, SamVisionConfig, pipeline
from transformers.testing_utils import cleanup, require_torch, require_torch_sdpa, slow, torch_device
from transformers.testing_utils import Expectations, cleanup, require_torch, require_torch_sdpa, slow, torch_device
from transformers.utils import is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
@ -771,9 +771,18 @@ class SamModelIntegrationTest(unittest.TestCase):
with torch.no_grad():
outputs = model(**inputs)
scores = outputs.iou_scores.squeeze().cpu()
masks = outputs.pred_masks[0, 0, 0, 0, :3].cpu()
masks = outputs.pred_masks[0, 0, 0, 0, :3]
expectations = Expectations(
{
(None, None): [-12.7729, -12.3665, -12.6061],
("cuda", 8): [-12.7657, -12.3683, -12.5983],
}
)
expected_masks = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(scores[-1], torch.tensor(0.9566), rtol=2e-4, atol=2e-4)
torch.testing.assert_close(masks, torch.tensor([-12.7729, -12.3665, -12.6061]), rtol=2e-4, atol=2e-4)
torch.testing.assert_close(masks, expected_masks, rtol=2e-4, atol=2e-4)
def test_inference_mask_generation_batched_points_batched_images(self):
model = SamModel.from_pretrained("facebook/sam-vit-base")

View File

@ -27,7 +27,7 @@ from transformers import (
SamHQVisionModel,
pipeline,
)
from transformers.testing_utils import cleanup, require_torch, require_torch_sdpa, slow, torch_device
from transformers.testing_utils import Expectations, cleanup, require_torch, require_torch_sdpa, slow, torch_device
from transformers.utils import is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
@ -802,9 +802,15 @@ class SamHQModelIntegrationTest(unittest.TestCase):
masks = outputs.pred_masks[0, 0, 0, 0, :3]
self.assertTrue(torch.allclose(scores[0][0][-1], torch.tensor(0.4482), atol=2e-4))
self.assertTrue(
torch.allclose(masks, torch.tensor([-13.1695, -14.6201, -14.8989]).to(torch_device), atol=2e-3)
expectations = Expectations(
{
(None, None): [-13.1695, -14.6201, -14.8989],
("cuda", 8): [-13.1668, -14.6182, -14.8970],
}
)
EXPECTED_MASKS = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(masks, EXPECTED_MASKS, atol=2e-3, rtol=2e-3)
def test_inference_mask_generation_one_point_one_bb(self):
model = SamHQModel.from_pretrained("syscv-community/sam-hq-vit-base")
@ -849,28 +855,53 @@ class SamHQModelIntegrationTest(unittest.TestCase):
with torch.no_grad():
outputs = model(**inputs)
scores = outputs.iou_scores.squeeze().cpu()
masks = outputs.pred_masks[0, 0, 0, 0, :3].cpu()
EXPECTED_SCORES = torch.tensor(
[
[
[0.9195, 0.8316, 0.6614],
[0.9195, 0.8316, 0.6614],
[0.9195, 0.8316, 0.6614],
[0.9195, 0.8316, 0.6614],
],
[
[0.7598, 0.7388, 0.3110],
[0.9195, 0.8317, 0.6614],
[0.9195, 0.8317, 0.6614],
[0.9195, 0.8317, 0.6614],
],
]
)
EXPECTED_MASKS = torch.tensor([-40.2445, -37.4300, -38.1577])
scores = outputs.iou_scores.squeeze()
masks = outputs.pred_masks[0, 0, 0, 0, :3]
self.assertTrue(torch.allclose(scores, EXPECTED_SCORES, atol=1e-3))
self.assertTrue(torch.allclose(masks, EXPECTED_MASKS, atol=9e-3))
expectations = Expectations(
{
(None, None): [
[
[0.9195, 0.8316, 0.6614],
[0.9195, 0.8316, 0.6614],
[0.9195, 0.8316, 0.6614],
[0.9195, 0.8316, 0.6614],
],
[
[0.7598, 0.7388, 0.3110],
[0.9195, 0.8317, 0.6614],
[0.9195, 0.8317, 0.6614],
[0.9195, 0.8317, 0.6614],
],
],
("cuda", 8): [
[
[0.9195, 0.8316, 0.6614],
[0.9195, 0.8316, 0.6614],
[0.9195, 0.8316, 0.6614],
[0.9195, 0.8316, 0.6614],
],
[
[0.7597, 0.7387, 0.3110],
[0.9195, 0.8316, 0.6614],
[0.9195, 0.8316, 0.6614],
[0.9195, 0.8316, 0.6614],
],
],
}
)
EXPECTED_SCORES = torch.tensor(expectations.get_expectation()).to(torch_device)
expectations = Expectations(
{
(None, None): [-40.2445, -37.4300, -38.1577],
("cuda", 8): [-40.2351, -37.4334, -38.1526],
}
)
EXPECTED_MASKS = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(scores, EXPECTED_SCORES, atol=1e-3, rtol=1e-3)
torch.testing.assert_close(masks, EXPECTED_MASKS, atol=9e-3, rtol=9e-3)
def test_inference_mask_generation_one_point_one_bb_zero(self):
model = SamHQModel.from_pretrained("syscv-community/sam-hq-vit-base")

View File

@ -16,7 +16,7 @@
import unittest
from transformers import SegformerConfig, is_torch_available, is_vision_available
from transformers.testing_utils import require_torch, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, slow, torch_device
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
@ -200,6 +200,9 @@ class SegformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_image_segmentation(*config_and_inputs)
def test_batching_equivalence(self, atol=2e-4, rtol=2e-4):
super().test_batching_equivalence(atol=atol, rtol=rtol)
@unittest.skip(reason="SegFormer does not use inputs_embeds")
def test_inputs_embeds(self):
pass
@ -367,14 +370,22 @@ class SegformerModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, model.config.num_labels, 128, 128))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor(
[
[[-4.6310, -5.5232, -6.2356], [-5.1921, -6.1444, -6.5996], [-5.4424, -6.2790, -6.7574]],
[[-12.1391, -13.3122, -13.9554], [-12.8732, -13.9352, -14.3563], [-12.9438, -13.8226, -14.2513]],
[[-12.5134, -13.4686, -14.4915], [-12.8669, -14.4343, -14.7758], [-13.2523, -14.5819, -15.0694]],
]
).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
expectations = Expectations(
{
(None, None): [
[[-4.6310, -5.5232, -6.2356], [-5.1921, -6.1444, -6.5996], [-5.4424, -6.2790, -6.7574]],
[[-12.1391, -13.3122, -13.9554], [-12.8732, -13.9352, -14.3563], [-12.9438, -13.8226, -14.2513]],
[[-12.5134, -13.4686, -14.4915], [-12.8669, -14.4343, -14.7758], [-13.2523, -14.5819, -15.0694]],
],
("cuda", 8): [
[[-4.6310, -5.5232, -6.2361], [-5.1918, -6.1445, -6.5996], [-5.4427, -6.2792, -6.7580]],
[[-12.1397, -13.3124, -13.9551], [-12.8736, -13.9347, -14.3569], [-12.9440, -13.8222, -14.2514]],
[[-12.5135, -13.4682, -14.4913], [-12.8670, -14.4339, -14.7766], [-13.2519, -14.5800, -15.0685]],
],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)
@slow
def test_inference_image_segmentation_city(self):
@ -396,13 +407,24 @@ class SegformerModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, model.config.num_labels, 128, 128))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor(
[
[[-13.5748, -13.9111, -12.6500], [-14.3500, -15.3683, -14.2328], [-14.7532, -16.0424, -15.6087]],
[[-17.1651, -15.8725, -12.9653], [-17.2580, -17.3718, -14.8223], [-16.6058, -16.8783, -16.7452]],
[[-3.6456, -3.0209, -1.4203], [-3.0797, -3.1959, -2.0000], [-1.8757, -1.9217, -1.6997]],
]
).to(torch_device)
expected_slice = torch.tensor([]).to(torch_device)
expectations = Expectations(
{
(None, None): [
[[-13.5748, -13.9111, -12.6500], [-14.3500, -15.3683, -14.2328], [-14.7532, -16.0424, -15.6087]],
[[-17.1651, -15.8725, -12.9653], [-17.2580, -17.3718, -14.8223], [-16.6058, -16.8783, -16.7452]],
[[-3.6456, -3.0209, -1.4203], [-3.0797, -3.1959, -2.0000], [-1.8757, -1.9217, -1.6997]],
],
("cuda", 8): [
[[-13.5728, -13.9089, -12.6492], [-14.3478, -15.3656, -14.2309], [-14.7512, -16.0394, -15.6065]],
[[-17.1642, -15.8704, -12.9641], [-17.2572, -17.3701, -14.8214], [-16.6043, -16.8761, -16.7425]],
[[-3.6444, -3.0189, -1.4195], [-3.0787, -3.1953, -1.9993], [-1.8755, -1.9219, -1.7002]],
],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3, :3, :3], expected_slice, rtol=1e-1, atol=1e-1)
@slow

View File

@ -21,6 +21,7 @@ from datasets import load_dataset
from transformers import SegGptConfig
from transformers.testing_utils import (
Expectations,
require_torch,
require_vision,
slow,
@ -379,15 +380,23 @@ class SegGptModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 3, 896, 448))
self.assertEqual(outputs.pred_masks.shape, expected_shape)
expected_slice = torch.tensor(
[
[[-2.1208, -2.1190, -2.1198], [-2.1237, -2.1228, -2.1227], [-2.1232, -2.1226, -2.1228]],
[[-2.0405, -2.0396, -2.0403], [-2.0434, -2.0434, -2.0433], [-2.0428, -2.0432, -2.0434]],
[[-1.8102, -1.8088, -1.8099], [-1.8131, -1.8126, -1.8129], [-1.8130, -1.8128, -1.8131]],
]
).to(torch_device)
expectations = Expectations(
{
(None, None): [
[[-2.1208, -2.1190, -2.1198], [-2.1237, -2.1228, -2.1227], [-2.1232, -2.1226, -2.1228]],
[[-2.0405, -2.0396, -2.0403], [-2.0434, -2.0434, -2.0433], [-2.0428, -2.0432, -2.0434]],
[[-1.8102, -1.8088, -1.8099], [-1.8131, -1.8126, -1.8129], [-1.8130, -1.8128, -1.8131]],
],
("cuda", 8): [
[[-2.1208, -2.1189, -2.1198], [-2.1236, -2.1229, -2.1230], [-2.1233, -2.1227, -2.1228]],
[[-2.0408, -2.0398, -2.0405], [-2.0435, -2.0437, -2.0438], [-2.0431, -2.0435, -2.0436]],
[[-1.8101, -1.8086, -1.8098], [-1.8129, -1.8126, -1.8130], [-1.8128, -1.8128, -1.8130]],
],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.pred_masks[0, :, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(outputs.pred_masks[0, :, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)
result = image_processor.post_process_semantic_segmentation(outputs, [input_image.size[::-1]])[0]

View File

@ -16,7 +16,7 @@
import unittest
from transformers import Swin2SRConfig
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device
from transformers.utils import is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
@ -360,7 +360,12 @@ class Swin2SRModelIntegrationTest(unittest.TestCase):
# verify the logits
expected_shape = torch.Size([1, 3, 976, 1296])
self.assertEqual(outputs.reconstruction.shape, expected_shape)
expected_slice = torch.tensor(
[[0.5454, 0.5542, 0.5640], [0.5518, 0.5562, 0.5649], [0.5391, 0.5425, 0.5620]], dtype=model.dtype
).to(torch_device)
torch.testing.assert_close(outputs.reconstruction[0, 0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
expectations = Expectations(
{
(None, None): [[0.5454, 0.5542, 0.5640], [0.5518, 0.5562, 0.5649], [0.5391, 0.5425, 0.5620]],
("cuda", 8): [[0.5454, 0.5547, 0.5640], [0.5522, 0.5562, 0.5649], [0.5391, 0.5425, 0.5620]],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device, dtype=model.dtype)
torch.testing.assert_close(outputs.reconstruction[0, 0, :3, :3], expected_slice, rtol=2e-4, atol=2e-4)

View File

@ -19,6 +19,7 @@ import unittest
from transformers import SwitchTransformersConfig, is_torch_available
from transformers.testing_utils import (
Expectations,
require_tokenizers,
require_torch,
require_torch_accelerator,
@ -1035,18 +1036,28 @@ class SwitchTransformerModelIntegrationTests(unittest.TestCase):
decoder_input_ids = torch.ones((32, 64), dtype=torch.long).to(torch_device)
# fmt: off
EXPECTED_MEAN_LOGITS = torch.Tensor(
[
-0.204102, -0.193359, 0.523438, -0.296875, 0.108887,
0.0211182, 0.605469, -0.100586, -0.0551758, 0.296875,
0.0090332, 0.174805, 0.139648, -0.170898, -0.0981445,
0.0245361, 0.0373535, 0.050293, -0.212891, 0.129883,
0.390625, -0.203125, -0.122559, -0.180664, 0.0437012,
-0.349609, -0.0250244, -0.104004, -0.15918, -0.133789
]
).to(torch.bfloat16)
expectations = Expectations(
{
(None, None): [
-0.204102, -0.193359, 0.523438, -0.296875, 0.108887,
0.0211182, 0.605469, -0.100586, -0.0551758, 0.296875,
0.0090332, 0.174805, 0.139648, -0.170898, -0.0981445,
0.0245361, 0.0373535, 0.050293, -0.212891, 0.129883,
0.390625, -0.203125, -0.122559, -0.180664, 0.0437012,
-0.349609, -0.0250244, -0.104004, -0.15918, -0.133789
],
("cuda", 8): [
-0.2051, -0.1914, 0.5352, -0.2988, 0.1108, 0.0200, 0.6094, -0.1025,
-0.0549, 0.2988, -0.0018, 0.1758, 0.1348, -0.1689, -0.1035, 0.0266,
0.0383, 0.0493, -0.2119, 0.1328, 0.3906, -0.2041, -0.1240, -0.1836,
0.0454, -0.3477, -0.0256, -0.1050, -0.1572, -0.1338
],
}
)
EXPECTED_MEAN_LOGITS = torch.tensor(expectations.get_expectation()).to(torch_device, dtype=torch.bfloat16)
# fmt: on
hf_logits = model(input_ids, decoder_input_ids=decoder_input_ids).last_hidden_state.cpu()
hf_logits = model(input_ids, decoder_input_ids=decoder_input_ids).last_hidden_state
hf_logits = hf_logits[0, 0, :30]
torch.testing.assert_close(hf_logits, EXPECTED_MEAN_LOGITS, rtol=6e-3, atol=9e-3)

View File

@ -21,7 +21,7 @@ from huggingface_hub import hf_hub_download
from transformers import TimesformerConfig
from transformers.models.auto import get_values
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
@ -350,6 +350,11 @@ class TimesformerModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 400))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([-0.3016, -0.7713, -0.4205]).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
expectations = Expectations(
{
(None, None): [-0.3016, -0.7713, -0.4205],
("cuda", 8): [-0.3004, -0.7708, -0.4205],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)

View File

@ -18,6 +18,7 @@ import unittest
from transformers import pipeline
from transformers.testing_utils import (
Expectations,
require_bitsandbytes,
require_timm,
require_torch,
@ -304,10 +305,16 @@ class TimmWrapperModelIntegrationTest(unittest.TestCase):
expected_label = 281 # tabby cat
self.assertEqual(torch.argmax(outputs.logits).item(), expected_label)
expected_slice = torch.tensor([-11.2618, -9.6192, -10.3205]).to(torch_device)
expectations = Expectations(
{
(None, None): [-11.2618, -9.6192, -10.3205],
("cuda", 8): [-11.2634, -9.6208, -10.3199],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
resulted_slice = outputs.logits[0, :3]
is_close = torch.allclose(resulted_slice, expected_slice, atol=1e-3)
self.assertTrue(is_close, f"Expected {expected_slice}, but got {resulted_slice}")
torch.testing.assert_close(resulted_slice, expected_slice, atol=1e-3, rtol=1e-3)
@slow
def test_inference_with_pipeline(self):
@ -349,10 +356,16 @@ class TimmWrapperModelIntegrationTest(unittest.TestCase):
expected_label = 281 # tabby cat
self.assertEqual(torch.argmax(outputs.logits).item(), expected_label)
expected_slice = torch.tensor([-2.4043, 1.4492, -0.5127]).to(outputs.logits.dtype)
resulted_slice = outputs.logits[0, :3].cpu()
is_close = torch.allclose(resulted_slice, expected_slice, atol=0.1)
self.assertTrue(is_close, f"Expected {expected_slice}, but got {resulted_slice}")
expectations = Expectations(
{
(None, None): [-2.4043, 1.4492, -0.5127],
("cuda", 8): [-2.2676, 1.5303, -0.4409],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
resulted_slice = outputs.logits[0, :3].to(dtype=torch.float32)
torch.testing.assert_close(resulted_slice, expected_slice, atol=0.1, rtol=0.1)
@slow
def test_transformers_model_for_classification_is_equivalent_to_timm(self):

View File

@ -24,6 +24,7 @@ from pytest import mark
from transformers import VideoMAEConfig
from transformers.models.auto import get_values
from transformers.testing_utils import (
Expectations,
is_flaky,
require_flash_attn,
require_torch,
@ -442,9 +443,14 @@ class VideoMAEModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 400))
self.assertEqual(outputs.logits.shape, expected_shape)
expected_slice = torch.tensor([0.3669, -0.0688, -0.2421]).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4)
expectations = Expectations(
{
(None, None): [0.3669, -0.0688, -0.2421],
("cuda", 8): [0.3668, -0.0690, -0.2421],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4)
@slow
def test_inference_for_pretraining(self):

View File

@ -169,6 +169,9 @@ class VitPoseModelTest(ModelTesterMixin, unittest.TestCase):
self.config_tester.check_config_can_be_init_without_params()
self.config_tester.check_config_arguments_init()
def test_batching_equivalence(self, atol=3e-4, rtol=3e-4):
super().test_batching_equivalence(atol=atol, rtol=rtol)
@unittest.skip(reason="VitPose does not support input and output embeddings")
def test_model_common_attributes(self):
pass

View File

@ -137,6 +137,9 @@ class VitPoseBackboneModelTest(ModelTesterMixin, unittest.TestCase):
def test_config(self):
self.config_tester.run_common_tests()
def test_batching_equivalence(self, atol=3e-4, rtol=3e-4):
super().test_batching_equivalence(atol=atol, rtol=rtol)
# TODO: @Pavel
@unittest.skip(reason="currently failing")
def test_initialization(self):

View File

@ -22,7 +22,7 @@ from huggingface_hub import hf_hub_download
from transformers import VivitConfig
from transformers.models.auto import get_values
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
@ -355,10 +355,14 @@ class VivitModelIntegrationTest(unittest.TestCase):
expected_shape = torch.Size((1, 400))
self.assertEqual(outputs.logits.shape, expected_shape)
# taken from original model
expected_slice = torch.tensor([-0.9498, 2.7971, -1.4049, 0.1024, -1.8353]).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :5], expected_slice, rtol=1e-4, atol=1e-4)
expectations = Expectations(
{
(None, None): [-0.9498, 2.7971, -1.4049, 0.1024, -1.8353],
("cuda", 8): [-0.9502, 2.7967, -1.4046, 0.1027, -1.8345],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(outputs.logits[0, :5], expected_slice, rtol=2e-4, atol=2e-4)
@slow
def test_inference_interpolate_pos_encoding(self):

View File

@ -20,6 +20,7 @@ from datasets import load_dataset
from transformers import Wav2Vec2BertConfig, is_torch_available
from transformers.testing_utils import (
is_flaky,
require_torch,
require_torch_accelerator,
require_torch_fp16,
@ -434,6 +435,10 @@ class Wav2Vec2BertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Test
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
@is_flaky(description="Get lager difference with A10 and even with the new `5e-4` still flaky")
def test_batching_equivalence(self, atol=5e-4, rtol=5e-4):
super().test_batching_equivalence(atol=atol, rtol=rtol)
def test_model_with_relative(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs(position_embeddings_type="relative")
self.model_tester.create_and_check_model(*config_and_inputs)

View File

@ -428,8 +428,8 @@ class Wav2Vec2ConformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest
@is_flaky(
description="The `codevector_idx` computed with `argmax()` in `Wav2Vec2ConformerGumbelVectorQuantizer.forward` is not stable."
)
def test_batching_equivalence(self):
super().test_batching_equivalence()
def test_batching_equivalence(self, atol=1e-4, rtol=1e-4):
super().test_batching_equivalence(atol=atol, rtol=rtol)
def test_model_with_relative(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs(position_embeddings_type="relative")

View File

@ -22,7 +22,14 @@ import numpy as np
from huggingface_hub import hf_hub_download
from transformers import XCLIPConfig, XCLIPTextConfig, XCLIPVisionConfig
from transformers.testing_utils import require_torch, require_torch_multi_gpu, require_vision, slow, torch_device
from transformers.testing_utils import (
Expectations,
require_torch,
require_torch_multi_gpu,
require_vision,
slow,
torch_device,
)
from transformers.utils import is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
@ -751,10 +758,13 @@ class XCLIPModelIntegrationTest(unittest.TestCase):
self.assertEqual(outputs.vision_model_output.last_hidden_state.shape, expected_shape)
expected_slice = torch.tensor(
[[0.0126, 0.2109, 0.0609], [0.0448, 0.5862, -0.1688], [-0.0881, 0.8525, -0.3044]]
).to(torch_device)
torch.testing.assert_close(
outputs.vision_model_output.last_hidden_state[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4
expectations = Expectations(
{
(None, None): [[0.0126, 0.2109, 0.0609], [0.0448, 0.5862, -0.1688], [-0.0881, 0.8525, -0.3044]],
("cuda", 8): [[0.0141, 0.2114, 0.0599], [0.0446, 0.5866, -0.1674], [-0.0876, 0.8592, -0.3025]],
}
)
expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device)
torch.testing.assert_close(
outputs.vision_model_output.last_hidden_state[0, :3, :3], expected_slice, rtol=2e-4, atol=2e-4
)

View File

@ -662,6 +662,11 @@ class TrainerIntegrationCommon:
metrics = trainer.evaluate()
self.assertEqual(metrics[metric], best_value)
def remove_nan_logs(self, log):
for key in list(log.keys()):
if log[key] != log[key]: # Check if the value is NaN
del log[key]
def check_trainer_state_are_the_same(self, trainer_state, trainer_state1):
# We'll pop things so operate on copies.
state = trainer_state.copy()
@ -675,6 +680,10 @@ class TrainerIntegrationCommon:
for key in skip_log_keys:
_ = log.pop(key, None)
_ = log1.pop(key, None)
self.remove_nan_logs(log)
self.remove_nan_logs(log1)
self.assertEqual(log, log1)
def convert_to_sharded_checkpoint(self, folder, save_safe=True, load_safe=True):
@ -3174,6 +3183,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
self.assertAlmostEqual(b, b1, delta=1e-5)
@slow
@require_non_hpu
@require_accelerate
@require_torch_non_multi_accelerator
def test_auto_batch_size_finder(self):

View File

@ -62,4 +62,5 @@ if __name__ == "__main__":
start = end
end = start + num_jobs_per_splits + (1 if idx < num_jobs % args.num_splits else 0)
model_splits.append(d[start:end])
print(model_splits)