[MTP]support mtp chunk_prefill_v1 (#4366)

* support mtp chunk_prefill_v1

* fix mtp chunkprefill output, fix unit test

* fix unit test

* fix save_output
This commit is contained in:
freeliuzc
2025-10-15 13:21:32 +08:00
committed by GitHub
parent ffe7af8a97
commit 582aebd48b
11 changed files with 118 additions and 58 deletions
@@ -64,7 +64,6 @@ else:
save_output,
save_output_topk,
set_stop_value_multi_ends,
speculate_clear_accept_nums,
speculate_get_output_padding_offset,
speculate_get_padding_offset,
speculate_get_seq_lens_output,
@@ -369,12 +368,13 @@ def post_process_specualate(
model_output.accept_tokens,
model_output.accept_num,
model_output.not_need_stop,
model_output.seq_lens_decoder,
model_output.prompt_lens,
model_output.mp_rank,
save_each_rank,
envs.ENABLE_V1_KVCACHE_SCHEDULER,
)
speculate_clear_accept_nums(model_output.accept_num, model_output.seq_lens_decoder)
# Update pre_ids through accept tokens
speculate_set_value_by_flags_and_idx(