[Loader] add multi-thread model loading (#6877)

* multi-thread-loader

* fix ut
This commit is contained in:
bukejiyu
2026-04-10 14:40:15 +08:00
committed by GitHub
parent c1fb3112f8
commit 14d46181b8
12 changed files with 105 additions and 7 deletions
+7 -1
View File
@@ -45,7 +45,13 @@ def _make_cfg(**ov):
cc.enable_prefix_caching = cc.enable_chunked_prefill = False
cc.kv_cache_ratio, cc.kvcache_storage_backend, cc.num_cpu_blocks, cc.max_encoder_cache = 1.0, None, 0, 0
cc.cache_transfer_protocol, cc.total_block_num = "tcp", 100
lc = ns(load_strategy="auto", rsync_config={}, dynamic_load_weight=False, load_choices="auto")
lc = ns(
load_strategy="auto",
rsync_config={},
dynamic_load_weight=False,
load_choices="auto",
model_loader_extra_config={},
)
soc = ns(guided_decoding_backend=None, logits_processors=None, reasoning_parser="none")
soc.disable_any_whitespace = False
cfg = ns(model_config=mc, parallel_config=pc, scheduler_config=sc, cache_config=cc, load_config=lc)
+1
View File
@@ -99,5 +99,6 @@ def test_offline_model(
quantization,
"default_v1",
prompts,
{"enable_multithread_load": True, "num_threads": 2},
),
)
+2
View File
@@ -89,6 +89,7 @@ def form_model_get_output_topp0(
load_choices,
prompts,
speculative_config={},
model_loader_extra_config=None,
result_queue=None,
):
try:
@@ -100,6 +101,7 @@ def form_model_get_output_topp0(
load_choices=load_choices,
quantization=quantization,
speculative_config=speculative_config,
model_loader_extra_config=model_loader_extra_config,
) as fd_model:
fd_outputs = fd_model.generate_topp0(prompts, max_tokens=max_tokens)
result_queue.put(fd_outputs)