Fix Qwen2.5-VL Video Processor (#38366)
Some checks are pending
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Waiting to run
Build documentation / build (push) Waiting to run
New model PR merged notification / Notify new model (push) Waiting to run
Slow tests on important models (on Push - A10) / Get all modified files (push) Waiting to run
Slow tests on important models (on Push - A10) / Slow & FA2 tests (push) Blocked by required conditions
Self-hosted runner (push-caller) / Check if setup was changed (push) Waiting to run
Self-hosted runner (push-caller) / build-docker-containers (push) Blocked by required conditions
Self-hosted runner (push-caller) / Trigger Push CI (push) Blocked by required conditions
Secret Leaks / trufflehog (push) Waiting to run
Update Transformers metadata / build_and_package (push) Waiting to run

* Update processing_qwen2_5_vl.py

* Update processing_qwen2_5_vl.py

* Update modular_qwen2_5_vl.py

* Fix CI

* Update modular_qwen2_5_vl.py

* Update processing_qwen2_5_vl.py

* Update video_processing_utils.py
This commit is contained in:
Ye Liu 2025-05-27 19:46:37 +08:00 committed by GitHub
parent 80902ae9b1
commit 10ae443ec0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 10 additions and 3 deletions

View File

@ -1013,10 +1013,12 @@ class Qwen2_5_VLProcessor(Qwen2VLProcessor):
image_grid_thw = image_inputs["image_grid_thw"] image_grid_thw = image_inputs["image_grid_thw"]
if videos is not None: if videos is not None:
# pop fps in advance for passing kwargs validation
fps = output_kwargs["videos_kwargs"].pop("fps", 2.0)
videos_inputs = self.video_processor(videos=videos, **output_kwargs["videos_kwargs"]) videos_inputs = self.video_processor(videos=videos, **output_kwargs["videos_kwargs"])
video_grid_thw = videos_inputs["video_grid_thw"] video_grid_thw = videos_inputs["video_grid_thw"]
fps = output_kwargs["videos_kwargs"].pop("fps", 2.0)
if isinstance(fps, (int, float)): if isinstance(fps, (int, float)):
second_per_grid_ts = [self.video_processor.temporal_patch_size / fps] * len(video_grid_thw) second_per_grid_ts = [self.video_processor.temporal_patch_size / fps] * len(video_grid_thw)
elif hasattr(fps, "__len__") and len(fps) == len(video_grid_thw): elif hasattr(fps, "__len__") and len(fps) == len(video_grid_thw):

View File

@ -152,10 +152,12 @@ class Qwen2_5_VLProcessor(ProcessorMixin):
image_grid_thw = image_inputs["image_grid_thw"] image_grid_thw = image_inputs["image_grid_thw"]
if videos is not None: if videos is not None:
# pop fps in advance for passing kwargs validation
fps = output_kwargs["videos_kwargs"].pop("fps", 2.0)
videos_inputs = self.video_processor(videos=videos, **output_kwargs["videos_kwargs"]) videos_inputs = self.video_processor(videos=videos, **output_kwargs["videos_kwargs"])
video_grid_thw = videos_inputs["video_grid_thw"] video_grid_thw = videos_inputs["video_grid_thw"]
fps = output_kwargs["videos_kwargs"].pop("fps", 2.0)
if isinstance(fps, (int, float)): if isinstance(fps, (int, float)):
second_per_grid_ts = [self.video_processor.temporal_patch_size / fps] * len(video_grid_thw) second_per_grid_ts = [self.video_processor.temporal_patch_size / fps] * len(video_grid_thw)
elif hasattr(fps, "__len__") and len(fps) == len(video_grid_thw): elif hasattr(fps, "__len__") and len(fps) == len(video_grid_thw):

View File

@ -250,7 +250,10 @@ class BaseVideoProcessor(BaseImageProcessorFast):
videos: VideoInput, videos: VideoInput,
**kwargs: Unpack[VideosKwargs], **kwargs: Unpack[VideosKwargs],
) -> BatchFeature: ) -> BatchFeature:
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self.valid_kwargs.__annotations__.keys()) validate_kwargs(
captured_kwargs=kwargs.keys(),
valid_processor_keys=list(self.valid_kwargs.__annotations__.keys()) + ["return_tensors"],
)
# Set default kwargs from self. This ensures that if a kwarg is not provided # Set default kwargs from self. This ensures that if a kwarg is not provided
# by the user, it gets its default value from the instance, or is set to None. # by the user, it gets its default value from the instance, or is set to None.
for kwarg_name in self.valid_kwargs.__annotations__: for kwarg_name in self.valid_kwargs.__annotations__: