[Iluvatar] Optimize decode group_gemm and Support cuda graph for ernie (#6803)

2026-04-23 00:17:25 +08:00 · 2026-03-12 19:21:17 +08:00
parent 250ce40b40
commit 901b38c936
9 changed files with 140 additions and 81 deletions
@@ -198,7 +198,7 @@ python -m fastdeploy.entrypoints.openai.api_server \
       --max-model-len 32768 \
       --max-num-seqs 8 \
       --block-size 16 \
-       --graph-optimization-config '{"use_cudagraph": false}' > server.log 2>&1 &
+       --graph-optimization-config '{"use_cudagraph": true}' > server.log 2>&1 &

 check_server_status