mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
209e5cf7f4
* [CE]add 21b cpu cache ,glm mtp,glm for rl config * [CE]add 21b tp2 yaml * [CE]add 21b mooncake yaml * add fastdeploy benchmark,paddletest-155 * [CE] adjust vl wint4 config * [CE]add glm mtp with updatemodel config * [CE]fix * fix * test * test * test --------- Co-authored-by: xiegegege <>
11 lines
257 B
YAML
11 lines
257 B
YAML
enable_mm: True
|
|
max_model_len: 32768
|
|
max_num_seqs: 56
|
|
gpu_memory_utilization: 0.9
|
|
kv_cache_ratio: 0.8
|
|
tensor_parallel_size: 8
|
|
quantization: wint4
|
|
limit_mm_per_prompt: '{"image": 100, "video": 100}'
|
|
reasoning_parser: ernie-45-vl
|
|
max_num_batched_tokens: 4096
|