mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-25 09:57:51 +08:00
[Optimization] The pre- and post-processing pipeline do not perform dict conversion (#5494)
* to_request_for_infer initial commit * refact to from_chat_completion_request * preprocess use request initial commit * bugfix * processors refact to using request * bug fix * refact Request from_generic_request * post process initial commit * bugfix * postprocess second commit * bugfix * serving_embedding initial commit * serving_reward initial commit * bugfix * replace function name * async_llm initial commit * offline initial commit and fix bug * bugfix * fix async_llm * remove add speculate_metrics into data * fix logprobs bug * fix echo bug * fix bug * fix reasoning_max_tokens * bugfix * bugfix and modify unittest * bugfix and modify unit test * bugfix * bugfix * bugfix * modify unittest * fix error when reasong_content is none for text_processor * remove some unnessary logic * revert removed logic * implement add and set method for RequestOutput and refact code * modify unit test * modify unit test * union process_request and process_request_obj * remove a unit test * union process_response and process_response_obj * support qwen3_vl_processor * modify unittest and remove comments * fix prompt_logprobs * fix codestyle * add v1 * v1 * fix unit test * fix unit test * fix pre-commit * fix * add process request * add process request * fix * fix * fix unit test * fix unit test * fix unit test * fix unit test * fix unit test * remove file * add unit test * add unit test * add unit test * fix unit test * fix unit test * fix * fix --------- Co-authored-by: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com> Co-authored-by: luukunn <981429396@qq.com> Co-authored-by: luukunn <83932082+luukunn@users.noreply.github.com> Co-authored-by: Zhang Yulong <35552275+ZhangYulongg@users.noreply.github.com>
This commit is contained in:
Binary file not shown.
@@ -0,0 +1,15 @@
|
||||
"""
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
@@ -0,0 +1,109 @@
|
||||
"""
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import datetime
|
||||
import hashlib
|
||||
import os
|
||||
import threading
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from PIL import Image
|
||||
|
||||
RAW_VIDEO_DIR = "./download_tmp/raw_video/"
|
||||
RAW_IMAGE_DIR = "./download_tmp/raw_images/"
|
||||
EXTRACTED_FRAME_DIR = "./download_tmp/extracted_frames/"
|
||||
TMP_DIR = "./download_tmp/upload_tmp/"
|
||||
|
||||
|
||||
def file_download(url, download_dir, save_to_disk=False, retry=0, retry_interval=3):
|
||||
"""
|
||||
Description: 下载url,如果url是PIL直接返回
|
||||
Args:
|
||||
url(str, PIL): http/本地路径/io.Bytes,注意io.Bytes是图片字节流
|
||||
download_path: 在save_to_disk=True的情况下生效,返回保存地址
|
||||
save_to_disk: 是否保存在本地路径
|
||||
|
||||
"""
|
||||
from .video_utils import VideoReaderWrapper
|
||||
|
||||
if isinstance(url, Image.Image):
|
||||
return url
|
||||
elif isinstance(url, VideoReaderWrapper):
|
||||
return url
|
||||
elif url.startswith("http"):
|
||||
response = requests.get(url)
|
||||
bytes_data = response.content
|
||||
elif os.path.isfile(url):
|
||||
if save_to_disk:
|
||||
return url
|
||||
bytes_data = open(url, "rb").read()
|
||||
else:
|
||||
bytes_data = base64.b64decode(url)
|
||||
if not save_to_disk:
|
||||
return bytes_data
|
||||
|
||||
download_path = os.path.join(download_dir, get_filename(url))
|
||||
Path(download_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(download_path, "wb") as f:
|
||||
f.write(bytes_data)
|
||||
return download_path
|
||||
|
||||
|
||||
def get_filename(url=None):
|
||||
"""
|
||||
Get Filename
|
||||
"""
|
||||
if url is None:
|
||||
return str(uuid.uuid4()).replace("-", "")
|
||||
t = datetime.datetime.now()
|
||||
if not isinstance(url, bytes):
|
||||
url = url.encode("utf-8")
|
||||
|
||||
md5_hash = hashlib.md5(url).hexdigest()
|
||||
pid = os.getpid()
|
||||
tid = threading.get_ident()
|
||||
|
||||
# 去掉后缀,防止save-jpg报错
|
||||
image_filname = f"{t.year}-{t.month:02d}-{t.day:02d}-{pid}-{tid}-{md5_hash}"
|
||||
return image_filname
|
||||
|
||||
|
||||
def get_downloadable(
|
||||
url,
|
||||
download_dir=RAW_VIDEO_DIR,
|
||||
save_to_disk=False,
|
||||
retry=0,
|
||||
retry_interval=3,
|
||||
):
|
||||
"""download video and store it in the disk
|
||||
|
||||
return downloaded **path** if save_to_disk is set to true
|
||||
return downloaded **bytes** if save_to_disk is set to false
|
||||
"""
|
||||
|
||||
if not os.path.exists(download_dir):
|
||||
os.makedirs(download_dir)
|
||||
downloaded_path = file_download(
|
||||
url,
|
||||
download_dir,
|
||||
save_to_disk=save_to_disk,
|
||||
retry=retry,
|
||||
retry_interval=retry_interval,
|
||||
)
|
||||
return downloaded_path
|
||||
@@ -0,0 +1,103 @@
|
||||
"""
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
cur_directory = Path(__file__).parent.absolute()
|
||||
FONT_PATH = os.path.join(cur_directory, "Roboto-Regular.ttf")
|
||||
|
||||
|
||||
def render_single_image_with_timestamp(image: Image, number: str, rate: float, font_path: str = FONT_PATH):
|
||||
"""
|
||||
函数功能: 给pil.image的图片渲染时间戳
|
||||
时间戳的大小为 min(width, height)的rate
|
||||
字体的颜色为黑色, 轮廓是白色, 轮廓的大小是字体的10%
|
||||
返回一个 Image 对象
|
||||
"""
|
||||
draw = ImageDraw.Draw(image) # 创建一个可绘制对象
|
||||
width, height = image.size # 获取图片大小
|
||||
font_size = int(min(width, height) * rate) # 设置字体大小
|
||||
outline_size = int(font_size * 0.1) # 设置轮廓大小
|
||||
font = ImageFont.truetype(font_path, font_size) # 加载字体文件, 设置字体大小
|
||||
x = 0
|
||||
y = 0 # 文本的x坐标, y坐标
|
||||
|
||||
# 绘制黑色的时间戳,白色的边框
|
||||
draw.text(
|
||||
(x, y),
|
||||
number,
|
||||
font=font,
|
||||
fill=(0, 0, 0),
|
||||
stroke_width=outline_size,
|
||||
stroke_fill=(255, 255, 255),
|
||||
)
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def timestamp_converting(time_stamp_in_seconds):
|
||||
"""
|
||||
convert timestamp format from seconds to hr:min:sec
|
||||
"""
|
||||
# get hours
|
||||
hours = 0
|
||||
while time_stamp_in_seconds >= 3600:
|
||||
hours += 1
|
||||
time_stamp_in_seconds -= 3600
|
||||
# get minutes
|
||||
mins = 0
|
||||
while time_stamp_in_seconds >= 60:
|
||||
mins += 1
|
||||
time_stamp_in_seconds -= 60
|
||||
time_hours = f"{int(hours):02d}"
|
||||
time_mins = f"{int(mins):02d}"
|
||||
time_secs = f"{time_stamp_in_seconds:05.02f}"
|
||||
fi_time_stamp = time_hours + ":" + time_mins + ":" + time_secs
|
||||
|
||||
return fi_time_stamp
|
||||
|
||||
|
||||
def get_timestamp_for_uniform_frame_extraction(num_frames, frame_id, duration):
|
||||
"""
|
||||
function: get the timestamp of a frame, 在均匀抽帧时用。
|
||||
|
||||
num_frames: 总帧数
|
||||
frameid_list: 被抽帧的帧的索引
|
||||
duration: 视频的总时长
|
||||
return: timestamp; xx:xx:xx (str)
|
||||
"""
|
||||
time_stamp = duration * 1.0 * frame_id / num_frames
|
||||
|
||||
return time_stamp
|
||||
|
||||
|
||||
def render_frame_timestamp(frame, timestamp, font_rate=0.1):
|
||||
"""
|
||||
函数功能, 给frame, 按照顺序将 index 渲染上去
|
||||
逻辑思路: 把index渲染到图片的左上方
|
||||
|
||||
frame: 帧,PIL.Image object
|
||||
timestamp: 时间戳,单位是秒
|
||||
font_rate: 字体大小占 min(wi, hei)的比率
|
||||
"""
|
||||
|
||||
time_stamp = "time: " + timestamp_converting(timestamp)
|
||||
new_frame = render_single_image_with_timestamp(frame, time_stamp, font_rate)
|
||||
|
||||
return new_frame
|
||||
@@ -0,0 +1,83 @@
|
||||
"""
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
import io
|
||||
import os
|
||||
from tempfile import NamedTemporaryFile as ntf
|
||||
|
||||
import decord
|
||||
|
||||
try:
|
||||
# moviepy 1.0
|
||||
import moviepy.editor as mp
|
||||
except:
|
||||
# moviepy 2.0
|
||||
import moviepy as mp
|
||||
|
||||
|
||||
def is_gif(data: bytes) -> bool:
|
||||
"""
|
||||
check if a bytes is a gif based on the magic head
|
||||
"""
|
||||
return data[:6] in (b"GIF87a", b"GIF89a")
|
||||
|
||||
|
||||
class VideoReaderWrapper(decord.VideoReader):
|
||||
"""
|
||||
Solving memory leak bug
|
||||
|
||||
https://github.com/dmlc/decord/issues/208
|
||||
"""
|
||||
|
||||
def __init__(self, video_path, *args, **kwargs):
|
||||
with ntf(delete=True, suffix=".gif") as gif_file:
|
||||
gif_input = None
|
||||
self.original_file = None
|
||||
if isinstance(video_path, str):
|
||||
self.original_file = video_path
|
||||
if video_path.lower().endswith(".gif"):
|
||||
gif_input = video_path
|
||||
elif isinstance(video_path, bytes):
|
||||
if is_gif(video_path):
|
||||
gif_file.write(video_path)
|
||||
gif_input = gif_file.name
|
||||
elif isinstance(video_path, io.BytesIO):
|
||||
video_path.seek(0)
|
||||
tmp_bytes = video_path.read()
|
||||
video_path.seek(0)
|
||||
if is_gif(tmp_bytes):
|
||||
gif_file.write(tmp_bytes)
|
||||
gif_input = gif_file.name
|
||||
|
||||
if gif_input is not None:
|
||||
clip = mp.VideoFileClip(gif_input)
|
||||
mp4_file = ntf(delete=False, suffix=".mp4")
|
||||
clip.write_videofile(mp4_file.name, verbose=False, logger=None)
|
||||
clip.close()
|
||||
video_path = mp4_file.name
|
||||
self.original_file = video_path
|
||||
|
||||
super().__init__(video_path, *args, **kwargs)
|
||||
self.seek(0)
|
||||
|
||||
def __getitem__(self, key):
|
||||
frames = super().__getitem__(key)
|
||||
self.seek(0)
|
||||
return frames
|
||||
|
||||
def __del__(self):
|
||||
if self.original_file and os.path.exists(self.original_file):
|
||||
os.remove(self.original_file)
|
||||
Reference in New Issue
Block a user