qk norm for speculate decode C16 (#3637)

2026-04-23 00:17:25 +08:00 · 2025-09-03 14:53:56 +08:00
parent d22d3de256
commit fa58a9fa8f
6 changed files with 470 additions and 160 deletions
@@ -277,7 +277,10 @@ void AppendAttentionKernel(
            exec_stream,
            &qkv_out,
            const_cast<paddle::Tensor*>(&key_cache),
-            const_cast<paddle::Tensor*>(&value_cache));
+            const_cast<paddle::Tensor*>(&value_cache),
+            q_norm_weight,
+            k_norm_weight,
+            rms_norm_eps);
      } else {
        SpeculateWriteCacheWithRoPEKernel<data_t, data_t>(
            meta_data,
@@ -300,7 +303,10 @@ void AppendAttentionKernel(
            exec_stream,
            &qkv_out,
            const_cast<paddle::Tensor*>(&key_cache),
-            const_cast<paddle::Tensor*>(&value_cache));
+            const_cast<paddle::Tensor*>(&value_cache),
+            q_norm_weight,
+            k_norm_weight,
+            rms_norm_eps);
      }
    } else {
      if (qkv_out_scales) {