mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Iluvatar] Optimize decode group_gemm and Support cuda graph for ernie (#6803)
This commit is contained in:
@@ -198,7 +198,7 @@ python -m fastdeploy.entrypoints.openai.api_server \
|
||||
--max-model-len 32768 \
|
||||
--max-num-seqs 8 \
|
||||
--block-size 16 \
|
||||
--graph-optimization-config '{"use_cudagraph": false}' > server.log 2>&1 &
|
||||
--graph-optimization-config '{"use_cudagraph": true}' > server.log 2>&1 &
|
||||
|
||||
check_server_status
|
||||
|
||||
|
||||
Reference in New Issue
Block a user