diff --git a/benchmarks/yaml/paddleocr-vl-16k-bf16.yaml b/benchmarks/yaml/paddleocr-vl-16k-bf16.yaml new file mode 100644 index 0000000000..a5794f4337 --- /dev/null +++ b/benchmarks/yaml/paddleocr-vl-16k-bf16.yaml @@ -0,0 +1,6 @@ +max_model_len: 16384 +max_num_seqs: 256 +max_num_batched_tokens: 16384 +tensor_parallel_size: 1 +gpu_memory_utilization: 0.7 +workers: 4