[Iluvatar] Optimize decode group_gemm and Support cuda graph for ernie (#6803)

This commit is contained in:
yzwu
2026-03-12 19:21:17 +08:00
committed by GitHub
parent 250ce40b40
commit 901b38c936
9 changed files with 140 additions and 81 deletions
+1 -1
View File
@@ -7,7 +7,7 @@ on:
description: "Build Images"
required: true
type: string
default: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-ixuca:3.3.0"
default: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-ixuca:3.3.0-20260312"
FASTDEPLOY_ARCHIVE_URL:
description: "URL of the compressed FastDeploy code archive."
required: true
+1 -1
View File
@@ -19,5 +19,5 @@ jobs:
needs: [clone]
uses: ./.github/workflows/_iluvatar_cases.yml
with:
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-ixuca:3.3.0
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-ixuca:3.3.0-20260312
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}