Remove benchmark.py after #34275

This commit is contained in:
Arthur Zucker 2025-01-10 11:09:06 +01:00
parent e0646f3dce
commit a9bd1e6284

View File

@ -1,132 +0,0 @@
import os
import time
import cv2
import av
import numpy as np
from numba import jit, cuda
from decord import VideoReader, cpu, gpu
import torch
from torchvision import io
video_dir = "/raid/raushan/temp_dir/"
NUM_FRAMES = 32
# @jit(nopython=True, target_backend='cuda') # <-- If you have a cuda GPU
def process_video_cv2(video: cv2.VideoCapture, indices: np.array, length: int):
index = 0
frames = []
while video.isOpened():
success, frame = video.read()
if index in indices:
# Channel 0:B 1:G 2:R
height, width, channel = frame.shape
frames.append(frame[0:height, 0:width, 0:channel])
if success:
index += 1
if index >= length:
break
video.release()
return frames
def read_video_opencv(video_path, num_frames=NUM_FRAMES):
'''
Decode the video with open-cv decoder.
Args:
video_path (str): Path to the video file.
num_frames (int): Number of frames to sample uniformly. Defaults to NUM_FRAMES
Returns:
np.ndarray: np array of decoded frames of shape (num_frames, height, width, 3).
'''
video = cv2.VideoCapture(video_path)
fps = int(video.get(cv2.CAP_PROP_FPS))
total_num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
indices = np.arange(0, total_num_frames, total_num_frames / num_frames).astype(int)
frames = process_video_cv2(video, indices, total_num_frames)
return np.stack(frames)
def read_video_decord(video_path, num_frames=NUM_FRAMES):
'''
Decode the video with Decord decoder.
Args:
video_path (str): Path to the video file.
num_frames (int): Number of frames to sample uniformly. Defaults to NUM_FRAMES
Returns:
np.ndarray: np array of decoded frames of shape (num_frames, height, width, 3).
'''
vr = VideoReader(uri=video_path, ctx=cpu(0)) # you need to install from source to use gpu ctx
indices = np.arange(0, len(vr), len(vr) / num_frames).astype(int)
frames = vr.get_batch(indices).asnumpy()
return frames
def read_video_pyav(video_path, num_frames=NUM_FRAMES):
'''
Decode the video with PyAV decoder.
Args:
video_path (str): Path to the video file.
num_frames (int): Number of frames to sample uniformly. Defaults to NUM_FRAMES
Returns:
np.ndarray: np array of decoded frames of shape (num_frames, height, width, 3).
'''
container = av.open(video_path)
# sample uniformly "num_frames" frames from the video
total_frames = container.streams.video[0].frames
indices = np.arange(0, total_frames, total_frames / num_frames).astype(int)
frames = []
container.seek(0)
start_index = indices[0]
end_index = indices[-1]
for i, frame in enumerate(container.decode(video=0)):
if i > end_index:
break
if i >= start_index and i in indices:
frames.append(frame)
return np.stack([x.to_ndarray(format="rgb24") for x in frames])
def read_video_torchvision(video_path, num_frames=NUM_FRAMES):
video, _, info = io.read_video(
video_path,
start_pts=0.0,
end_pts=None,
pts_unit="sec",
output_format="TCHW",
)
idx = torch.linspace(0, video.size(0) - 1, num_frames, dtype=torch.int64)
return video[idx]
decoders = {"decord": read_video_decord, "opencv": read_video_opencv, "av": read_video_pyav, "torchvision": read_video_torchvision}
for name, fn in decoders.items():
start = time.perf_counter()
for video_file in os.listdir(video_dir):
path = f"{video_dir}/{video_file}"
output = fn(path)
end = time.perf_counter()
print(f"Time taken for {name}: {(end-start):.04f} sec")
# Time taken for decord: 475.2979 sec
# Time taken for opencv: 614.6062 sec
# Time taken for av: 1067.0860 sec
# Time taken for torchvision: 1924.0433 sec