Use triton qk_norm both in Prefill and Decode (#7213)

Co-authored-by: “liuruian” <liuruian@baidu.com>
2026-04-23 00:17:25 +08:00 · 2026-04-10 15:44:01 +08:00
parent 5c9fa43150
commit 870dbac370
2 changed files with 2 additions and 2 deletions
@@ -341,7 +341,7 @@ class QKRMSNorm(nn.Layer):
        forward_meta,
        proxy_rmsnorm=None,
    ) -> paddle.Tensor:
-        if proxy_rmsnorm is None and self.qk_norm_fused and forward_meta.step_use_cudagraph:
+        if proxy_rmsnorm is None and self.qk_norm_fused:
            qkv_out = qk_rmsnorm_fused(
                qkv_out,
                self.q_norm.weight,
@@ -173,7 +173,7 @@ def test_consistency_between_runs(api_url, headers, consistent_payload):
    content1 = result1["choices"][0]["message"]["content"]

    # base result
-    content2 = "视频中手机支架的颜色是黑色的。"
+    content2 = "视频中手机支架的颜色是黑色。"

    # Verify that result is same as the base result
    assert content1.startswith(content2), content1