mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 21:00:08 +06:00
Fix Qwen2.5-VL Video Processor (#38366)
Some checks are pending
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Waiting to run
Build documentation / build (push) Waiting to run
New model PR merged notification / Notify new model (push) Waiting to run
Slow tests on important models (on Push - A10) / Get all modified files (push) Waiting to run
Slow tests on important models (on Push - A10) / Slow & FA2 tests (push) Blocked by required conditions
Self-hosted runner (push-caller) / Check if setup was changed (push) Waiting to run
Self-hosted runner (push-caller) / build-docker-containers (push) Blocked by required conditions
Self-hosted runner (push-caller) / Trigger Push CI (push) Blocked by required conditions
Secret Leaks / trufflehog (push) Waiting to run
Update Transformers metadata / build_and_package (push) Waiting to run
Some checks are pending
Self-hosted runner (benchmark) / Benchmark (aws-g5-4xlarge-cache) (push) Waiting to run
Build documentation / build (push) Waiting to run
New model PR merged notification / Notify new model (push) Waiting to run
Slow tests on important models (on Push - A10) / Get all modified files (push) Waiting to run
Slow tests on important models (on Push - A10) / Slow & FA2 tests (push) Blocked by required conditions
Self-hosted runner (push-caller) / Check if setup was changed (push) Waiting to run
Self-hosted runner (push-caller) / build-docker-containers (push) Blocked by required conditions
Self-hosted runner (push-caller) / Trigger Push CI (push) Blocked by required conditions
Secret Leaks / trufflehog (push) Waiting to run
Update Transformers metadata / build_and_package (push) Waiting to run
* Update processing_qwen2_5_vl.py * Update processing_qwen2_5_vl.py * Update modular_qwen2_5_vl.py * Fix CI * Update modular_qwen2_5_vl.py * Update processing_qwen2_5_vl.py * Update video_processing_utils.py
This commit is contained in:
parent
80902ae9b1
commit
10ae443ec0
@ -1013,10 +1013,12 @@ class Qwen2_5_VLProcessor(Qwen2VLProcessor):
|
|||||||
image_grid_thw = image_inputs["image_grid_thw"]
|
image_grid_thw = image_inputs["image_grid_thw"]
|
||||||
|
|
||||||
if videos is not None:
|
if videos is not None:
|
||||||
|
# pop fps in advance for passing kwargs validation
|
||||||
|
fps = output_kwargs["videos_kwargs"].pop("fps", 2.0)
|
||||||
|
|
||||||
videos_inputs = self.video_processor(videos=videos, **output_kwargs["videos_kwargs"])
|
videos_inputs = self.video_processor(videos=videos, **output_kwargs["videos_kwargs"])
|
||||||
video_grid_thw = videos_inputs["video_grid_thw"]
|
video_grid_thw = videos_inputs["video_grid_thw"]
|
||||||
|
|
||||||
fps = output_kwargs["videos_kwargs"].pop("fps", 2.0)
|
|
||||||
if isinstance(fps, (int, float)):
|
if isinstance(fps, (int, float)):
|
||||||
second_per_grid_ts = [self.video_processor.temporal_patch_size / fps] * len(video_grid_thw)
|
second_per_grid_ts = [self.video_processor.temporal_patch_size / fps] * len(video_grid_thw)
|
||||||
elif hasattr(fps, "__len__") and len(fps) == len(video_grid_thw):
|
elif hasattr(fps, "__len__") and len(fps) == len(video_grid_thw):
|
||||||
|
@ -152,10 +152,12 @@ class Qwen2_5_VLProcessor(ProcessorMixin):
|
|||||||
image_grid_thw = image_inputs["image_grid_thw"]
|
image_grid_thw = image_inputs["image_grid_thw"]
|
||||||
|
|
||||||
if videos is not None:
|
if videos is not None:
|
||||||
|
# pop fps in advance for passing kwargs validation
|
||||||
|
fps = output_kwargs["videos_kwargs"].pop("fps", 2.0)
|
||||||
|
|
||||||
videos_inputs = self.video_processor(videos=videos, **output_kwargs["videos_kwargs"])
|
videos_inputs = self.video_processor(videos=videos, **output_kwargs["videos_kwargs"])
|
||||||
video_grid_thw = videos_inputs["video_grid_thw"]
|
video_grid_thw = videos_inputs["video_grid_thw"]
|
||||||
|
|
||||||
fps = output_kwargs["videos_kwargs"].pop("fps", 2.0)
|
|
||||||
if isinstance(fps, (int, float)):
|
if isinstance(fps, (int, float)):
|
||||||
second_per_grid_ts = [self.video_processor.temporal_patch_size / fps] * len(video_grid_thw)
|
second_per_grid_ts = [self.video_processor.temporal_patch_size / fps] * len(video_grid_thw)
|
||||||
elif hasattr(fps, "__len__") and len(fps) == len(video_grid_thw):
|
elif hasattr(fps, "__len__") and len(fps) == len(video_grid_thw):
|
||||||
|
@ -250,7 +250,10 @@ class BaseVideoProcessor(BaseImageProcessorFast):
|
|||||||
videos: VideoInput,
|
videos: VideoInput,
|
||||||
**kwargs: Unpack[VideosKwargs],
|
**kwargs: Unpack[VideosKwargs],
|
||||||
) -> BatchFeature:
|
) -> BatchFeature:
|
||||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self.valid_kwargs.__annotations__.keys())
|
validate_kwargs(
|
||||||
|
captured_kwargs=kwargs.keys(),
|
||||||
|
valid_processor_keys=list(self.valid_kwargs.__annotations__.keys()) + ["return_tensors"],
|
||||||
|
)
|
||||||
# Set default kwargs from self. This ensures that if a kwarg is not provided
|
# Set default kwargs from self. This ensures that if a kwarg is not provided
|
||||||
# by the user, it gets its default value from the instance, or is set to None.
|
# by the user, it gets its default value from the instance, or is set to None.
|
||||||
for kwarg_name in self.valid_kwargs.__annotations__:
|
for kwarg_name in self.valid_kwargs.__annotations__:
|
||||||
|
Loading…
Reference in New Issue
Block a user