mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Loader] add multi-thread model loading (#6877)
* multi-thread-loader * fix ut
This commit is contained in:
@@ -45,7 +45,13 @@ def _make_cfg(**ov):
|
||||
cc.enable_prefix_caching = cc.enable_chunked_prefill = False
|
||||
cc.kv_cache_ratio, cc.kvcache_storage_backend, cc.num_cpu_blocks, cc.max_encoder_cache = 1.0, None, 0, 0
|
||||
cc.cache_transfer_protocol, cc.total_block_num = "tcp", 100
|
||||
lc = ns(load_strategy="auto", rsync_config={}, dynamic_load_weight=False, load_choices="auto")
|
||||
lc = ns(
|
||||
load_strategy="auto",
|
||||
rsync_config={},
|
||||
dynamic_load_weight=False,
|
||||
load_choices="auto",
|
||||
model_loader_extra_config={},
|
||||
)
|
||||
soc = ns(guided_decoding_backend=None, logits_processors=None, reasoning_parser="none")
|
||||
soc.disable_any_whitespace = False
|
||||
cfg = ns(model_config=mc, parallel_config=pc, scheduler_config=sc, cache_config=cc, load_config=lc)
|
||||
|
||||
@@ -99,5 +99,6 @@ def test_offline_model(
|
||||
quantization,
|
||||
"default_v1",
|
||||
prompts,
|
||||
{"enable_multithread_load": True, "num_threads": 2},
|
||||
),
|
||||
)
|
||||
|
||||
@@ -89,6 +89,7 @@ def form_model_get_output_topp0(
|
||||
load_choices,
|
||||
prompts,
|
||||
speculative_config={},
|
||||
model_loader_extra_config=None,
|
||||
result_queue=None,
|
||||
):
|
||||
try:
|
||||
@@ -100,6 +101,7 @@ def form_model_get_output_topp0(
|
||||
load_choices=load_choices,
|
||||
quantization=quantization,
|
||||
speculative_config=speculative_config,
|
||||
model_loader_extra_config=model_loader_extra_config,
|
||||
) as fd_model:
|
||||
fd_outputs = fd_model.generate_topp0(prompts, max_tokens=max_tokens)
|
||||
result_queue.put(fd_outputs)
|
||||
|
||||
Reference in New Issue
Block a user