[Iluvatar] Optimize decode group_gemm and Support cuda graph for ernie (#6803)

This commit is contained in:
yzwu
2026-03-12 19:21:17 +08:00
committed by GitHub
parent 250ce40b40
commit 901b38c936
9 changed files with 140 additions and 81 deletions
+1 -1
View File
@@ -198,7 +198,7 @@ python -m fastdeploy.entrypoints.openai.api_server \
--max-model-len 32768 \
--max-num-seqs 8 \
--block-size 16 \
--graph-optimization-config '{"use_cudagraph": false}' > server.log 2>&1 &
--graph-optimization-config '{"use_cudagraph": true}' > server.log 2>&1 &
check_server_status