enable test_assisted_decoding_in_different_gpu test on XPU (#37120)

Signed-off-by: YAO Matrix <matrix.yao@intel.com>
Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
Yao Matrix 2025-04-01 17:22:59 +08:00 committed by GitHub
parent 737cbd2109
commit 8f6b27eb5c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3748,11 +3748,13 @@ class GenerationIntegrationTests(unittest.TestCase):
self.assertTrue(y_prob <= 1.0 and n_prob <= 1.0)
@slow
@require_torch_multi_gpu
@require_torch_multi_accelerator
def test_assisted_decoding_in_different_gpu(self):
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to("cuda:0")
device_0 = f"{torch_device}:0" if torch_device != "cpu" else "cpu"
device_1 = f"{torch_device}:1" if torch_device != "cpu" else "cpu"
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to(device_0)
assistant = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to(
"cuda:1"
device_1
)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM")
model.config.pad_token_id = tokenizer.eos_token_id