mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] consider multimodal model when dummy run (#6045)
* add mm do profile * updata code * update code * update code * update code * update test case * update code * update code * fix xpu bug * update code * add mm do profile * update test case * update code
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
"""
|
||||
|
||||
import traceback
|
||||
from collections.abc import Mapping
|
||||
|
||||
import numpy as np
|
||||
from paddleformers.generation import GenerationConfig
|
||||
@@ -318,3 +319,16 @@ class Ernie4_5_VLProcessor(Ernie4_5Processor):
|
||||
outs["position_ids"] = np.array(outs["position_ids"], dtype=np.int64)
|
||||
outs["mm_num_token_func"] = self.ernie4_5_processor.mm_num_tokens
|
||||
return outs
|
||||
|
||||
def get_mm_max_tokens_per_item(
|
||||
self,
|
||||
seq_len: int,
|
||||
) -> Mapping[str, int]:
|
||||
"""
|
||||
Get maximum number of tokens per multimodal item.
|
||||
Args:
|
||||
seq_len: Maximum model length
|
||||
Returns:
|
||||
A mapping from modalities to their respective maximum token counts.
|
||||
"""
|
||||
return self.ernie4_5_processor.get_mm_max_tokens_per_item(seq_len)
|
||||
|
||||
@@ -20,6 +20,7 @@ import copy
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from collections.abc import Mapping
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
@@ -32,7 +33,7 @@ from fastdeploy.engine.request import ImagePosition
|
||||
from fastdeploy.entrypoints.chat_utils import parse_chat_messages
|
||||
from fastdeploy.input.ernie4_5_tokenizer import Ernie4_5Tokenizer
|
||||
from fastdeploy.input.mm_data_processor import MMBaseDataProcessor
|
||||
from fastdeploy.input.utils import IDS_TYPE_FLAG
|
||||
from fastdeploy.input.utils import IDS_TYPE_FLAG, MAX_IMAGE_DIMENSION
|
||||
from fastdeploy.multimodal.hasher import MultimodalHasher
|
||||
from fastdeploy.utils import data_processor_logger
|
||||
|
||||
@@ -745,3 +746,45 @@ class DataProcessor(MMBaseDataProcessor):
|
||||
req = pickle.dumps((mm_hashes, mm_items))
|
||||
socket.send_multipart([b"", req])
|
||||
data_processor_logger.info(f"Update cache of mm_hashes: {mm_hashes}")
|
||||
|
||||
def get_image_size_with_most_features(self):
|
||||
resized_height, resized_width = self.image_preprocessor.get_smarted_resize(
|
||||
height=MAX_IMAGE_DIMENSION,
|
||||
width=MAX_IMAGE_DIMENSION,
|
||||
min_pixels=self.image_min_pixels,
|
||||
max_pixels=self.image_max_pixels,
|
||||
)[0]
|
||||
return (resized_height, resized_width)
|
||||
|
||||
def get_max_image_tokens(
|
||||
self,
|
||||
seq_len: int,
|
||||
) -> int:
|
||||
target_height, target_width = self.get_image_size_with_most_features()
|
||||
patches_h, patches_w = self.image_preprocessor.get_smarted_resize(
|
||||
height=target_height,
|
||||
width=target_width,
|
||||
min_pixels=self.image_min_pixels,
|
||||
max_pixels=self.image_max_pixels,
|
||||
)[1]
|
||||
num_image_tokens = (patches_h * patches_w) // (self.spatial_conv_size**2)
|
||||
return min(num_image_tokens, seq_len)
|
||||
|
||||
def get_max_video_tokens(self, seq_len: int) -> int:
|
||||
target_height, target_width = self.get_image_size_with_most_features()
|
||||
patches_h, patches_w = self.image_preprocessor.get_smarted_resize(
|
||||
height=target_height,
|
||||
width=target_width,
|
||||
min_pixels=self.video_min_pixels,
|
||||
max_pixels=self.video_max_pixels,
|
||||
)[1]
|
||||
num_video_tokens = (patches_h * patches_w) // (self.spatial_conv_size**2 * self.temporal_conv_size)
|
||||
return min(num_video_tokens, seq_len)
|
||||
|
||||
def get_mm_max_tokens_per_item(
|
||||
self,
|
||||
seq_len: int,
|
||||
) -> Mapping[str, int]:
|
||||
max_image_tokens = self.get_max_image_tokens(seq_len)
|
||||
max_video_tokens = self.get_max_video_tokens(seq_len)
|
||||
return {"image": max_image_tokens, "video": max_video_tokens}
|
||||
|
||||
Reference in New Issue
Block a user