OWLv2: bug fix in post_process_object_detection() when using cuda device (#27468)

* OWLv2: bug fix in post_process_object_detection() when using cuda device * fix copies issue by fixing original function in owlvit
2025-07-31 02:02:21 +06:00 · 2023-11-13 17:31:44 +02:00 · 2023-11-13 17:31:44 +02:00 · 20abdacbef
commit 20abdacbef
parent 68ae3be7f5
2 changed files with 3 additions and 3 deletions
--- a/src/transformers/models/owlv2/image_processing_owlv2.py
+++ b/src/transformers/models/owlv2/image_processing_owlv2.py
@ -504,7 +504,7 @@ class Owlv2ImageProcessor(BaseImageProcessor):
            else:
                img_h, img_w = target_sizes.unbind(1)

-            scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1)
+            scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1).to(boxes.device)
            boxes = boxes * scale_fct[:, None, :]

        results = []
--- a/src/transformers/models/owlvit/image_processing_owlvit.py
+++ b/src/transformers/models/owlvit/image_processing_owlvit.py
@ -448,7 +448,7 @@ class OwlViTImageProcessor(BaseImageProcessor):

        # Convert from relative [0, 1] to absolute [0, height] coordinates
        img_h, img_w = target_sizes.unbind(1)
-        scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1)
+        scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1).to(boxes.device)
        boxes = boxes * scale_fct[:, None, :]

        results = [{"scores": s, "labels": l, "boxes": b} for s, l, b in zip(scores, labels, boxes)]
@ -498,7 +498,7 @@ class OwlViTImageProcessor(BaseImageProcessor):
            else:
                img_h, img_w = target_sizes.unbind(1)

-            scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1)
+            scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1).to(boxes.device)
            boxes = boxes * scale_fct[:, None, :]

        results = []