From 5d26a387359d669d74f14effbdc859f907133647 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Thu, 19 Jun 2025 13:50:33 +0200 Subject: [PATCH] Fix `FalconMambaIntegrationTests` (#38566) * update * update * update * update * update * update * update * update * update * update * update * update * update * update --------- Co-authored-by: ydshieh --- .../test_modeling_falcon_mamba.py | 64 +++++++++++++++---- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py index ef906951a49..e59787fb8c6 100644 --- a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py +++ b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py @@ -19,9 +19,12 @@ from unittest.util import safe_repr from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, FalconMambaConfig, is_torch_available from transformers.testing_utils import ( + Expectations, + cleanup, require_bitsandbytes, require_torch, require_torch_accelerator, + require_torch_large_accelerator, require_torch_multi_accelerator, require_torch_multi_gpu, slow, @@ -450,15 +453,30 @@ class FalconMambaIntegrationTests(unittest.TestCase): self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.text = "Hello today" - def test_generation_bf16(self): - model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.bfloat16, device_map="auto") + cleanup(torch_device, gc_collect=True) + + def tearDown(self): + cleanup(torch_device, gc_collect=True) + + # On T4, get `NotImplementedError: Cannot copy out of meta tensor; no data!` + @require_torch_large_accelerator + def test_generation_fp16(self): + model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.float16, device_map="auto") inputs = self.tokenizer(self.text, return_tensors="pt").to(torch_device) out = model.generate(**inputs, max_new_tokens=20, do_sample=False) + EXPECTED_OUTPUTS = Expectations( + { + ("cuda", 7): "Hello today I am going to show you how to make a simple and easy to make paper plane.\nStep", + ("cuda", 8): 'Hello today Iava,\n\nI am writing to you today to discuss the importance of maintaining a healthy lifestyle', + } + ) # fmt: skip + EXPECTED_OUTPUT = EXPECTED_OUTPUTS.get_expectation() + self.assertEqual( self.tokenizer.batch_decode(out, skip_special_tokens=False)[0], - "Hello today I am going to show you how to make a simple and easy to make paper plane.\nStep", + EXPECTED_OUTPUT, ) @require_bitsandbytes @@ -471,11 +489,11 @@ class FalconMambaIntegrationTests(unittest.TestCase): self.assertEqual( self.tokenizer.batch_decode(out, skip_special_tokens=False)[0], - """Hello today I'm going to talk about the "C" in the "C-I-""", + "Hello today Iava,\n\nI'm sorry to hear that you're having trouble with the ", ) def test_generation_torch_compile(self): - model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.bfloat16).to(torch_device) + model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.float16).to(torch_device) model = torch.compile(model) inputs = self.tokenizer(self.text, return_tensors="pt").to(torch_device) @@ -483,7 +501,7 @@ class FalconMambaIntegrationTests(unittest.TestCase): self.assertEqual( self.tokenizer.batch_decode(out, skip_special_tokens=False)[0], - "Hello today I am going to show you how to make a simple and easy to make paper plane.\nStep", + "Hello today Iava,\n\nI am writing to you today to discuss the importance of maintaining a healthy lifestyle", ) def test_batched_generation(self): @@ -493,13 +511,22 @@ class FalconMambaIntegrationTests(unittest.TestCase): texts = ["Hello today", "Hello my name is Younes and today"] - EXPECTED_OUTPUT = [ - "Hello today I'm going to show you how to make a 3D model of a house.\n", - "Hello my name is Younes and today I will be talking about the topic of “The importance of the internet in our life”.\n", - ] + EXPECTED_OUTPUTS = Expectations( + { + ("cuda", 7): [ + 'Hello today I will be talking about the “Theory of Relativity” by Albert Einstein.\nThe', + 'Hello my name is Younes and today I will be talking about the importance of the internet in our lives.\nThe internet is a global', + ], + ("cuda", 8): [ + 'Hello today I am going to talk about the “Theory of Relativity” by Albert Einstein.\n', + 'Hello my name is Younes and today I will be talking about the importance of the internet in our lives.\nThe internet is a global', + ], + } + ) # fmt: skip + EXPECTED_OUTPUT = EXPECTED_OUTPUTS.get_expectation() inputs = tok(texts, return_tensors="pt", padding=True, return_token_type_ids=False).to(torch_device) - model = AutoModelForCausalLM.from_pretrained(model_id, device_map=0, torch_dtype=torch.bfloat16) + model = AutoModelForCausalLM.from_pretrained(model_id, device_map=0, torch_dtype=torch.float16) out = model.generate(**inputs, max_new_tokens=20) out = tok.batch_decode(out, skip_special_tokens=True) @@ -514,6 +541,19 @@ class FalconMambaIntegrationTests(unittest.TestCase): out = model.generate(**inputs, max_new_tokens=20) out = tok.batch_decode(out, skip_special_tokens=True) + EXPECTED_OUTPUTS = Expectations( + { + ("cuda", 7): [ + ' I will be talking about the “Theory of Relativity” by Albert Einstein.\nThe', + ' I will be talking about the importance of the internet in our lives.\nThe internet is a global', + ], + ("cuda", 8): [ + ' I am going to talk about the “Theory of Relativity” by Albert Einstein.\n', + ' I will be talking about the importance of the internet in our lives.\nThe internet is a global' + ], + } + ) # fmt: skip + EXPECTED_OUTPUT = EXPECTED_OUTPUTS.get_expectation() self.assertListEqual(out, EXPECTED_OUTPUT) @require_torch_multi_accelerator @@ -521,7 +561,7 @@ class FalconMambaIntegrationTests(unittest.TestCase): model_id = "tiiuae/falcon-mamba-7b" tokenizer = AutoTokenizer.from_pretrained(model_id) - model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16) + model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16) tokenizer.pad_token_id = tokenizer.eos_token_id text = "Hello today"