mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-22 16:07:51 +08:00
[CE]add 21b mooncake yaml (#7033)
* [CE]add 21b cpu cache ,glm mtp,glm for rl config * [CE]add 21b tp2 yaml * [CE]add 21b mooncake yaml * add fastdeploy benchmark,paddletest-155 * [CE] adjust vl wint4 config * [CE]add glm mtp with updatemodel config * [CE]fix * fix * test * test * test --------- Co-authored-by: xiegegege <>
This commit is contained in:
@@ -0,0 +1,10 @@
|
||||
max_model_len: 32768
|
||||
max_num_seqs: 128
|
||||
tensor_parallel_size: 4
|
||||
graph_optimization_config:
|
||||
use_cudagraph: True
|
||||
draft_model_use_cudagraph: True
|
||||
load_choices: "default_v1"
|
||||
dynamic_load_weight: True
|
||||
load_strategy: ipc_snapshot
|
||||
shutdown_comm_group_if_worker_idle: False
|
||||
@@ -0,0 +1,5 @@
|
||||
max_model_len: 131072
|
||||
max_num_seqs: 256
|
||||
tensor_parallel_size: 2
|
||||
kvcache_storage_backend: "mooncake"
|
||||
enable_output_caching: True
|
||||
@@ -7,3 +7,4 @@ tensor_parallel_size: 8
|
||||
quantization: wint4
|
||||
limit_mm_per_prompt: '{"image": 100, "video": 100}'
|
||||
reasoning_parser: ernie-45-vl
|
||||
max_num_batched_tokens: 4096
|
||||
|
||||
Reference in New Issue
Block a user