Use triton qk_norm both in Prefill and Decode (#7213)

Co-authored-by: “liuruian” <liuruian@baidu.com>
This commit is contained in:
K11OntheBoat
2026-04-10 15:44:01 +08:00
committed by GitHub
parent 5c9fa43150
commit 870dbac370
2 changed files with 2 additions and 2 deletions
@@ -341,7 +341,7 @@ class QKRMSNorm(nn.Layer):
forward_meta,
proxy_rmsnorm=None,
) -> paddle.Tensor:
if proxy_rmsnorm is None and self.qk_norm_fused and forward_meta.step_use_cudagraph:
if proxy_rmsnorm is None and self.qk_norm_fused:
qkv_out = qk_rmsnorm_fused(
qkv_out,
self.q_norm.weight,
+1 -1
View File
@@ -173,7 +173,7 @@ def test_consistency_between_runs(api_url, headers, consistent_payload):
content1 = result1["choices"][0]["message"]["content"]
# base result
content2 = "视频中手机支架的颜色是黑色"
content2 = "视频中手机支架的颜色是黑色。"
# Verify that result is same as the base result
assert content1.startswith(content2), content1