mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-16 19:18:24 +06:00
Fix Aria tests (#37444)
* update aria tests Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * add cuda tests Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * check outputs for cpu and cuda and xpu Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * check outputs for cpu and cuda and xpu Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * check outputs for cpu and cuda and xpu Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * check output for each device Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix style Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix style Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix xpu output Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * add comments and use assert list equal Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * rm pad token assign Signed-off-by: jiqing-feng <jiqing.feng@intel.com> --------- Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
This commit is contained in:
parent
b6d65e40b2
commit
b7f7aa78a0
@ -420,8 +420,11 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
|
|||||||
@require_vision
|
@require_vision
|
||||||
@require_bitsandbytes
|
@require_bitsandbytes
|
||||||
def test_batched_generation(self):
|
def test_batched_generation(self):
|
||||||
model = AriaForConditionalGeneration.from_pretrained("rhymes-ai/Aria", load_in_4bit=True)
|
# Skip multihead_attn for 4bit because MHA will read the original weight without dequantize.
|
||||||
|
# See https://github.com/huggingface/transformers/pull/37444#discussion_r2045852538.
|
||||||
|
model = AriaForConditionalGeneration.from_pretrained(
|
||||||
|
"rhymes-ai/Aria", load_in_4bit=True, llm_int8_skip_modules=["multihead_attn"]
|
||||||
|
)
|
||||||
processor = AutoProcessor.from_pretrained("rhymes-ai/Aria")
|
processor = AutoProcessor.from_pretrained("rhymes-ai/Aria")
|
||||||
|
|
||||||
prompt1 = "<image>\n<image>\nUSER: What's the difference of two images?\nASSISTANT:"
|
prompt1 = "<image>\n<image>\nUSER: What's the difference of two images?\nASSISTANT:"
|
||||||
@ -432,24 +435,49 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
|
|||||||
image1 = Image.open(requests.get(url1, stream=True).raw)
|
image1 = Image.open(requests.get(url1, stream=True).raw)
|
||||||
image2 = Image.open(requests.get(url2, stream=True).raw)
|
image2 = Image.open(requests.get(url2, stream=True).raw)
|
||||||
|
|
||||||
inputs = processor(
|
# Create inputs
|
||||||
images=[image1, image2, image1, image2],
|
messages = [
|
||||||
text=[prompt1, prompt2, prompt3],
|
{
|
||||||
return_tensors="pt",
|
"role": "user",
|
||||||
padding=True,
|
"content": [
|
||||||
).to(torch_device)
|
{"type": "image"},
|
||||||
|
{"type": "text", "text": prompt1},
|
||||||
model = model.eval()
|
{"type": "image"},
|
||||||
|
{"type": "text", "text": prompt2},
|
||||||
EXPECTED_OUTPUT = [
|
],
|
||||||
"\n \nUSER: What's the difference of two images?\nASSISTANT: The difference between the two images is that one shows a dog standing on a grassy field, while",
|
},
|
||||||
"\nUSER: Describe the image.\nASSISTANT: The image features a brown and white dog sitting on a sidewalk. The dog is holding a small",
|
{
|
||||||
"\nUSER: Describe the image.\nASSISTANT: The image features a lone llama standing on a grassy hill. The llama is the",
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "image"},
|
||||||
|
{"type": "text", "text": prompt3},
|
||||||
|
],
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
prompts = [processor.apply_chat_template([message], add_generation_prompt=True) for message in messages]
|
||||||
|
images = [[image1, image2], [image2]]
|
||||||
|
inputs = processor(text=prompts, images=images, padding=True, return_tensors="pt").to(
|
||||||
|
device=model.device, dtype=model.dtype
|
||||||
|
)
|
||||||
|
|
||||||
|
EXPECTED_OUTPUT = {
|
||||||
|
"cpu": [
|
||||||
|
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n <image>\n USER: What's the difference of two images?\n ASSISTANT:<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with",
|
||||||
|
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a grassy hill. The alpaca has",
|
||||||
|
], # cpu output
|
||||||
|
"cuda": [
|
||||||
|
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n <image>\n USER: What's the difference of two images?\n ASSISTANT:<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with",
|
||||||
|
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a patch of ground with some dry grass. The",
|
||||||
|
], # cuda output
|
||||||
|
"xpu": [
|
||||||
|
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n <image>\n USER: What's the difference of two images?\n ASSISTANT:<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The first image features a cute, light-colored puppy sitting on a paved surface with",
|
||||||
|
"<|im_start|>user\n<fim_prefix><fim_suffix> <image>\n USER: Describe the image.\n ASSISTANT:<|im_end|>\n <|im_start|>assistant\n The image shows a young alpaca standing on a grassy hill. The alpaca has",
|
||||||
|
], # xpu output
|
||||||
|
}
|
||||||
generate_ids = model.generate(**inputs, max_new_tokens=20)
|
generate_ids = model.generate(**inputs, max_new_tokens=20)
|
||||||
outputs = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
outputs = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
||||||
self.assertEqual(outputs, EXPECTED_OUTPUT)
|
self.assertListEqual(outputs, EXPECTED_OUTPUT[model.device.type])
|
||||||
|
|
||||||
def test_tokenizer_integration(self):
|
def test_tokenizer_integration(self):
|
||||||
model_id = "rhymes-ai/Aria"
|
model_id = "rhymes-ai/Aria"
|
||||||
|
Loading…
Reference in New Issue
Block a user