mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
[Models] Add Qwen3-VL Model Support (#5763)
* support v1 loader * remove useless code * remove useless * [Model] support Qwen3VL images success * [Model] support Qwen3VL rope_3d * [Model] support Qwen3VL remove log * [Model] support Qwen3VL RL * [Model] support Qwen3VL tp * [Model] support Qwen3VL video * [Model] support Qwen3VL fix ernievl * [Model] support Qwen3VL fix get_image_boundaries.cc array out of bounds * [Model] support Qwen3VL fix multi card * [Model] support Qwen3VL file close * [Model] support Qwen3VL fix ce * [Model] support Qwen3VL fix unittest * [Model] support Qwen3VL add unittest --------- Co-authored-by: Ayakouji <yuhongh@qq.com>
This commit is contained in:
@@ -14,19 +14,22 @@
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
import math
|
||||
from typing import Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from fastdeploy.utils import data_processor_logger
|
||||
|
||||
from .image_processor import ceil_by_factor, floor_by_factor
|
||||
|
||||
|
||||
def sample_frames(
|
||||
frame_factor: int,
|
||||
min_frames: int,
|
||||
max_frames: int,
|
||||
metadata: Optional[dict] = None,
|
||||
fps: Optional[Union[int, float]] = None,
|
||||
num_frames: Optional[int] = None,
|
||||
fps: Optional[Union[int, float]] = -1,
|
||||
num_frames: Optional[int] = -1,
|
||||
):
|
||||
"""
|
||||
Sample frames from video according to specified criteria.
|
||||
@@ -61,16 +64,31 @@ def sample_frames(
|
||||
"Asked to sample `fps` frames per second but no video metadata was provided which is required when sampling with `fps`. "
|
||||
"Please pass in `VideoMetadata` object or use a fixed `num_frames` per input video"
|
||||
)
|
||||
max_frames = math.floor(min(max_frames, total_num_frames) / frame_factor) * frame_factor
|
||||
# max_frames = math.floor(min(max_frames, total_num_frames) / frame_factor) * frame_factor
|
||||
min_frames = ceil_by_factor(min_frames, frame_factor)
|
||||
max_frames = floor_by_factor(min(max_frames, total_num_frames), frame_factor)
|
||||
|
||||
num_frames = total_num_frames / metadata["fps"] * fps
|
||||
|
||||
if num_frames > total_num_frames:
|
||||
data_processor_logger.warning(f"smart_nframes: nframes[{num_frames}] > total_frames[{total_num_frames}]")
|
||||
|
||||
num_frames = min(min(max(num_frames, min_frames), max_frames), total_num_frames)
|
||||
num_frames = math.floor(num_frames / frame_factor) * frame_factor
|
||||
num_frames = floor_by_factor(num_frames, frame_factor)
|
||||
|
||||
if num_frames > total_num_frames:
|
||||
raise ValueError(
|
||||
f"Video can't be sampled. The inferred `num_frames={num_frames}` exceeds `total_num_frames={total_num_frames}`. "
|
||||
"Decrease `num_frames` or `fps` for sampling."
|
||||
)
|
||||
|
||||
# Hack code ensures that num_frames can always be divided by 4
|
||||
# due to sched/resource_manager_v1.py 中 grid_thw.extend([[2, h, w]] * (t // 2))
|
||||
if num_frames > 2 and num_frames % 4 != 0:
|
||||
num_frames = (num_frames // 4) * 4 # 向下取整到 4 的倍数
|
||||
total_num_frames = (total_num_frames // 4) * 4
|
||||
num_frames = min(min(max(num_frames, min_frames), max_frames), total_num_frames)
|
||||
|
||||
# Calculate frame indices based on sampling strategy
|
||||
if num_frames > 0:
|
||||
# Evenly spaced sampling for target frame count
|
||||
|
||||
Reference in New Issue
Block a user