From c25f27fa6a2c04fb344a55f817b4976dd823c0c9 Mon Sep 17 00:00:00 2001
From: NielsRogge <48327001+NielsRogge@users.noreply.github.com>
Date: Wed, 7 Sep 2022 12:24:12 +0200
Subject: [PATCH] [VideoMAE] Improve code examples (#18919)

* Simplify code example

* Add seed
---
 .../models/videomae/modeling_videomae.py      | 29 +++++++++----------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/transformers/models/videomae/modeling_videomae.py b/src/transformers/models/videomae/modeling_videomae.py
index a807ed7208f..7efff490d8c 100644
--- a/src/transformers/models/videomae/modeling_videomae.py
+++ b/src/transformers/models/videomae/modeling_videomae.py
@@ -598,21 +598,18 @@ class VideoMAEModel(VideoMAEPreTrainedModel):
         >>> file_path = hf_hub_download(
         ...     repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset"
         ... )
-        >>> vr = VideoReader(file_path, num_threads=1, ctx=cpu(0))
+        >>> videoreader = VideoReader(file_path, num_threads=1, ctx=cpu(0))
 
         >>> # sample 16 frames
-        >>> vr.seek(0)
-        >>> indices = sample_frame_indices(clip_len=16, frame_sample_rate=4, seg_len=len(vr))
-        >>> buffer = vr.get_batch(indices).asnumpy()
-
-        >>> # create a list of NumPy arrays
-        >>> video = [buffer[i] for i in range(buffer.shape[0])]
+        >>> videoreader.seek(0)
+        >>> indices = sample_frame_indices(clip_len=16, frame_sample_rate=4, seg_len=len(videoreader))
+        >>> video = videoreader.get_batch(indices).asnumpy()
 
         >>> feature_extractor = VideoMAEFeatureExtractor.from_pretrained("MCG-NJU/videomae-base")
         >>> model = VideoMAEModel.from_pretrained("MCG-NJU/videomae-base")
 
         >>> # prepare video for the model
-        >>> inputs = feature_extractor(video, return_tensors="pt")
+        >>> inputs = feature_extractor(list(video), return_tensors="pt")
 
         >>> # forward pass
         >>> outputs = model(**inputs)
@@ -943,10 +940,13 @@ class VideoMAEForVideoClassification(VideoMAEPreTrainedModel):
         ```python
         >>> from decord import VideoReader, cpu
         >>> import torch
+        >>> import numpy as np
 
         >>> from transformers import VideoMAEFeatureExtractor, VideoMAEForVideoClassification
         >>> from huggingface_hub import hf_hub_download
 
+        >>> np.random.seed(0)
+
 
         >>> def sample_frame_indices(clip_len, frame_sample_rate, seg_len):
         ...     converted_len = int(clip_len * frame_sample_rate)
@@ -961,20 +961,17 @@ class VideoMAEForVideoClassification(VideoMAEPreTrainedModel):
         >>> file_path = hf_hub_download(
         ...     repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset"
         ... )
-        >>> vr = VideoReader(file_path, num_threads=1, ctx=cpu(0))
+        >>> videoreader = VideoReader(file_path, num_threads=1, ctx=cpu(0))
 
         >>> # sample 16 frames
-        >>> vr.seek(0)
-        >>> indices = sample_frame_indices(clip_len=16, frame_sample_rate=4, seg_len=len(vr))
-        >>> buffer = vr.get_batch(indices).asnumpy()
-
-        >>> # create a list of NumPy arrays
-        >>> video = [buffer[i] for i in range(buffer.shape[0])]
+        >>> videoreader.seek(0)
+        >>> indices = sample_frame_indices(clip_len=16, frame_sample_rate=4, seg_len=len(videoreader))
+        >>> video = videoreader.get_batch(indices).asnumpy()
 
         >>> feature_extractor = VideoMAEFeatureExtractor.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics")
         >>> model = VideoMAEForVideoClassification.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics")
 
-        >>> inputs = feature_extractor(video, return_tensors="pt")
+        >>> inputs = feature_extractor(list(video), return_tensors="pt")
 
         >>> with torch.no_grad():
         ...     outputs = model(**inputs)