mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-24 01:29:57 +08:00
[OP][Optimization] Remove ENABLE_PREFILL template parameter in multi_query_append_attention_warp1_4_kernel (#7201)
This commit is contained in:
@@ -146,6 +146,8 @@ class AppendAttentionBackend(AttentionBackend):
|
||||
self.causal: bool = getattr(fd_config.model_config, "causal", True)
|
||||
self.speculative_method = fd_config.speculative_config.method
|
||||
self.speculate_max_draft_token_num: int = fd_config.speculative_config.num_speculative_tokens
|
||||
if self.speculative_method is None:
|
||||
self.speculate_max_draft_token_num = 0
|
||||
self.keep_pd_step_flag: bool = fd_config.speculative_config.model_type == "mtp"
|
||||
self.num_layers_draft_model: int = int(fd_config.speculative_config.method == SpecMethod.MTP)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user