qk norm for speculate decode C16 (#3637)

This commit is contained in:
Yuan Xiaolan
2025-09-03 14:53:56 +08:00
committed by GitHub
parent d22d3de256
commit fa58a9fa8f
6 changed files with 470 additions and 160 deletions
+8 -2
View File
@@ -277,7 +277,10 @@ void AppendAttentionKernel(
exec_stream,
&qkv_out,
const_cast<paddle::Tensor*>(&key_cache),
const_cast<paddle::Tensor*>(&value_cache));
const_cast<paddle::Tensor*>(&value_cache),
q_norm_weight,
k_norm_weight,
rms_norm_eps);
} else {
SpeculateWriteCacheWithRoPEKernel<data_t, data_t>(
meta_data,
@@ -300,7 +303,10 @@ void AppendAttentionKernel(
exec_stream,
&qkv_out,
const_cast<paddle::Tensor*>(&key_cache),
const_cast<paddle::Tensor*>(&value_cache));
const_cast<paddle::Tensor*>(&value_cache),
q_norm_weight,
k_norm_weight,
rms_norm_eps);
}
} else {
if (qkv_out_scales) {