[CE]add 21b mooncake yaml (#7033)

* [CE]add 21b cpu cache ,glm mtp,glm for rl config * [CE]add 21b tp2 yaml * [CE]add 21b mooncake yaml * add fastdeploy benchmark,paddletest-155 * [CE] adjust vl wint4 config * [CE]add glm mtp with updatemodel config * [CE]fix * fix * test * test * test --------- Co-authored-by: xiegegege <>
2026-04-22 16:07:51 +08:00 · 2026-03-26 20:01:05 +08:00
parent 442514252c
commit 209e5cf7f4
3 changed files with 16 additions and 0 deletions
@@ -0,0 +1,10 @@
+max_model_len: 32768
+max_num_seqs: 128
+tensor_parallel_size: 4
+graph_optimization_config:
+  use_cudagraph: True
+  draft_model_use_cudagraph: True
+load_choices: "default_v1"
+dynamic_load_weight: True
+load_strategy: ipc_snapshot
+shutdown_comm_group_if_worker_idle: False
@@ -0,0 +1,5 @@
+max_model_len: 131072
+max_num_seqs: 256
+tensor_parallel_size: 2
+kvcache_storage_backend: "mooncake"
+enable_output_caching: True
@@ -7,3 +7,4 @@ tensor_parallel_size: 8
 quantization: wint4
 limit_mm_per_prompt: '{"image": 100, "video": 100}'
 reasoning_parser: ernie-45-vl
+max_num_batched_tokens: 4096