[OP][Optimization] Remove ENABLE_PREFILL template parameter in multi_query_append_attention_warp1_4_kernel (#7201)

This commit is contained in:
周周周
2026-04-07 11:21:57 +08:00
committed by GitHub
parent 8cb417e8fb
commit 18f012457d
4 changed files with 32 additions and 66 deletions
@@ -146,6 +146,8 @@ class AppendAttentionBackend(AttentionBackend):
self.causal: bool = getattr(fd_config.model_config, "causal", True)
self.speculative_method = fd_config.speculative_config.method
self.speculate_max_draft_token_num: int = fd_config.speculative_config.num_speculative_tokens
if self.speculative_method is None:
self.speculate_max_draft_token_num = 0
self.keep_pd_step_flag: bool = fd_config.speculative_config.model_type == "mtp"
self.num_layers_draft_model: int = int(fd_config.speculative_config.method == SpecMethod.MTP)