mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
11 lines
258 B
YAML
11 lines
258 B
YAML
tensor_parallel_size: 8
|
|
max_num_seqs: 32
|
|
gpu_memory_utilization: 0.8
|
|
load_choices: default_v1
|
|
enable_prefix_caching: True
|
|
graph_optimization_config: '{"use_cudagraph":true}'
|
|
max_model_len: 66560
|
|
enable_logprob: True
|
|
enable_custom_all_reduce: False
|
|
worker: 2
|