Fix the seamless_m4t cannot work on Gaudi (#38363)

* Fix the seamless_m4t cannot work on Gaudi Signed-off-by: yuanwu <yuan.wu@intel.com> * Refine the patch Signed-off-by: yuanwu <yuan.wu@intel.com> * Fix seamless_m4t_v2 crash Signed-off-by: yuanwu <yuan.wu@intel.com> * Use the patched_gather Signed-off-by: yuanwu <yuan.wu@intel.com> * Remove debug logs Signed-off-by: yuanwu <yuan.wu@intel.com> * Remove useless modifications Signed-off-by: yuanwu <yuan.wu@intel.com> * Add hpu check Signed-off-by: yuanwu <yuan.wu@intel.com> * Add comments Signed-off-by: yuanwu <yuan.wu@intel.com> --------- Signed-off-by: yuanwu <yuan.wu@intel.com> Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
2025-08-02 11:11:05 +06:00 · 2025-06-25 18:40:01 +08:00 · 2025-06-25 18:40:01 +08:00 · de98fb25a3
commit de98fb25a3
parent 7503cb9113
1 changed files with 22 additions and 0 deletions
--- a/src/transformers/utils/import_utils.py
+++ b/src/transformers/utils/import_utils.py
@ -851,6 +851,28 @@ def is_torch_hpu_available():

        torch.Tensor.masked_fill_ = patched_masked_fill_

+    # We patch torch.gather for int64 tensors to avoid a bug on Gaudi
+    # Graph compile failed with synStatus 26 [Generic failure]
+    # This can be removed once bug is fixed but for now we need it.
+    original_gather = torch.Tensor.gather
+
+    def patched_gather(input: torch.Tensor, dim: int, index: torch.LongTensor) -> torch.Tensor:
+        if input.dtype == torch.int64 and input.device.type == "hpu":
+            logger.warning_once(
+                "torch.gather is not supported for int64 tensors on Gaudi. "
+                "This operation will be performed patched_gather using indexing."
+            )
+
+            idx = [torch.arange(size, device=input.device, dtype=input.dtype) for size in input.shape]
+            idx[dim] = index
+            idx = tuple(idx)
+            output = input[idx]
+            return output
+        else:
+            return original_gather(input, dim, index)
+
+    torch.Tensor.gather = patched_gather
+
    # IlyasMoutawwakil: we patch torch.compile to use the HPU backend by default
    # https://github.com/huggingface/transformers/pull/38790#discussion_r2157043944
    # This is necessary for cases where torch.compile is used as a decorator (defaulting to inductor)