[OP][Optimization] Remove ENABLE_PREFILL template parameter in multi_query_append_attention_warp1_4_kernel (#7201)

2026-04-24 01:29:57 +08:00 · 2026-04-07 11:21:57 +08:00
parent 8cb417e8fb
commit 18f012457d
4 changed files with 32 additions and 66 deletions
@@ -146,6 +146,8 @@ class AppendAttentionBackend(AttentionBackend):
        self.causal: bool = getattr(fd_config.model_config, "causal", True)
        self.speculative_method = fd_config.speculative_config.method
        self.speculate_max_draft_token_num: int = fd_config.speculative_config.num_speculative_tokens
+        if self.speculative_method is None:
+            self.speculate_max_draft_token_num = 0
        self.keep_pd_step_flag: bool = fd_config.speculative_config.model_type == "mtp"
        self.num_layers_draft_model: int = int(fd_config.speculative_config.method == SpecMethod.MTP)