mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[CE]add 21b mooncake yaml (#7033)
* [CE]add 21b cpu cache ,glm mtp,glm for rl config * [CE]add 21b tp2 yaml * [CE]add 21b mooncake yaml * add fastdeploy benchmark,paddletest-155 * [CE] adjust vl wint4 config * [CE]add glm mtp with updatemodel config * [CE]fix * fix * test * test * test --------- Co-authored-by: xiegegege <>
This commit is contained in:
@@ -0,0 +1,10 @@
|
|||||||
|
max_model_len: 32768
|
||||||
|
max_num_seqs: 128
|
||||||
|
tensor_parallel_size: 4
|
||||||
|
graph_optimization_config:
|
||||||
|
use_cudagraph: True
|
||||||
|
draft_model_use_cudagraph: True
|
||||||
|
load_choices: "default_v1"
|
||||||
|
dynamic_load_weight: True
|
||||||
|
load_strategy: ipc_snapshot
|
||||||
|
shutdown_comm_group_if_worker_idle: False
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
max_model_len: 131072
|
||||||
|
max_num_seqs: 256
|
||||||
|
tensor_parallel_size: 2
|
||||||
|
kvcache_storage_backend: "mooncake"
|
||||||
|
enable_output_caching: True
|
||||||
@@ -7,3 +7,4 @@ tensor_parallel_size: 8
|
|||||||
quantization: wint4
|
quantization: wint4
|
||||||
limit_mm_per_prompt: '{"image": 100, "video": 100}'
|
limit_mm_per_prompt: '{"image": 100, "video": 100}'
|
||||||
reasoning_parser: ernie-45-vl
|
reasoning_parser: ernie-45-vl
|
||||||
|
max_num_batched_tokens: 4096
|
||||||
|
|||||||
Reference in New Issue
Block a user