Files
FastDeploy/tests/ce/deploy/ernie45t_21b_sot_fp8.yaml
T

10 lines
220 B
YAML

max_model_len: 32768
max_num_seqs: 128
tensor_parallel_size: 1
quantization: block_wise_fp8
graph_optimization_config:
graph_opt_level: 1
sot_warmup_sizes: [2,16,32,64]
use_cudagraph: True
full_cuda_graph: False