Files
FastDeploy/benchmarks/yaml/x1-a3b-128k-wint8-h800-tp1.yaml
T
2025-10-14 15:04:06 +08:00

8 lines
164 B
YAML

tensor_parallel_size: 1
max_model_len: 131072
max_num_seqs: 32
reasoning_parser: ernie_x1
tool_call_parser: ernie_x1
load_choices: "default_v1"
quantization: wint8