[Feature] consider multimodal model when dummy run (#6045)

* add mm do profile

* updata code

* update code

* update code

* update code

* update test case

* update code

* update code

* fix xpu bug

* update code

* add mm do profile

* update test case

* update code
This commit is contained in:
kevin
2026-02-09 17:49:55 +08:00
committed by GitHub
parent 783d56e28a
commit d60daca4a8
25 changed files with 166 additions and 19 deletions
+9
View File
@@ -163,6 +163,7 @@ class EngineService:
)
self.bos_client = None
self.mm_max_tokens_per_item = None
self.guided_decoding_checker = None
if self.cfg.structured_outputs_config.guided_decoding_backend != "off":
self.guided_decoding_checker = schema_checker(
@@ -273,6 +274,12 @@ class EngineService:
self.cfg.tool_parser,
)
self.data_processor = self.input_processor.create_processor()
self.mm_max_tokens_per_item = self.data_processor.get_mm_max_tokens_per_item(
self.cfg.model_config.max_model_len
)
if self.mm_max_tokens_per_item is not None:
max_chunk_tokens = self.cfg.get_max_chunk_tokens(self.mm_max_tokens_per_item)
self.cfg.cache_config.postprocess(max_chunk_tokens, self.cfg.scheduler_config.max_num_seqs)
def _init_worker_monitor_signals(self): # exist_task_signal 用于各worker进程感知是否有新Task需要处理
current_suffix = self.cfg.parallel_config.local_engine_worker_queue_port
@@ -1998,6 +2005,8 @@ class EngineService:
)
if self.cfg.structured_outputs_config.logits_processors is not None:
arguments += f" --logits-processors {' '.join(self.cfg.structured_outputs_config.logits_processors)}"
if self.mm_max_tokens_per_item is not None:
arguments += f" --mm_max_tokens_per_item '{json.dumps(self.mm_max_tokens_per_item)}'"
worker_store_true_flag = {
"enable_expert_parallel": self.cfg.parallel_config.enable_expert_parallel,