mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
209e5cf7f4
* [CE]add 21b cpu cache ,glm mtp,glm for rl config * [CE]add 21b tp2 yaml * [CE]add 21b mooncake yaml * add fastdeploy benchmark,paddletest-155 * [CE] adjust vl wint4 config * [CE]add glm mtp with updatemodel config * [CE]fix * fix * test * test * test --------- Co-authored-by: xiegegege <>
11 lines
269 B
YAML
11 lines
269 B
YAML
max_model_len: 32768
|
|
max_num_seqs: 128
|
|
tensor_parallel_size: 4
|
|
graph_optimization_config:
|
|
use_cudagraph: True
|
|
draft_model_use_cudagraph: True
|
|
load_choices: "default_v1"
|
|
dynamic_load_weight: True
|
|
load_strategy: ipc_snapshot
|
|
shutdown_comm_group_if_worker_idle: False
|