[MTP] refactor MTP pre_process (#6358)

This commit is contained in:
周周周
2026-02-09 10:47:15 +08:00
committed by GitHub
parent 18e79dd660
commit 2b4748de4f
24 changed files with 411 additions and 533 deletions
@@ -261,9 +261,6 @@ class FlashAttentionBackend(AttentionBackend):
)
# Note(ZKK): here must be consistent with append_attn_backend.py
self.max_partition_size: int = int(os.getenv("FLAGS_max_partition_size", 1024))
self.zero_seq_enc_lens_for_decode = paddle.zeros(
shape=[fd_config.scheduler_config.max_num_seqs, 1], dtype=paddle.int32
)
def get_attention_meta(self):
"""get_attention_meta"""